R Markdown

#install.packages('TDAmapper')
library(TDAmapper)
library(cluster)
#install.packages('kernlab’)
library(kernlab)
#install.packages(‘class’)
library(class)
#install.packages('nnet')
library(nnet)
#install.packages(‘randomForest’)
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
#install.packages('e1071')
library(e1071)                                                  
#install.packages("BayesFactor")
library(BayesFactor)
## Loading required package: coda
## 
## Attaching package: 'coda'
## The following object is masked from 'package:kernlab':
## 
##     nvar
## Loading required package: Matrix
## ************
## Welcome to BayesFactor 0.9.12-4.5. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).
## 
## Type BFManual() to open the manual.
## ************
library(BayesPPD)
library(bayestestR)
#install.packages('igraph')
library('igraph')
## Warning: package 'igraph' was built under R version 4.3.3
## 
## Attaching package: 'igraph'
## The following object is masked from 'package:BayesFactor':
## 
##     compare
## The following object is masked from 'package:class':
## 
##     knn
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
#install.packages('locfit')
library(locfit)
## locfit 1.5-9.8    2023-06-11
#install.packages('ggplot2’)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
## 
##     margin
## The following object is masked from 'package:kernlab':
## 
##     alpha
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:igraph':
## 
##     as_data_frame, groups, union
## The following object is masked from 'package:randomForest':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#install.packages('networkD3')
library(networkD3)
library(rstanarm)
## Loading required package: Rcpp
## This is rstanarm version 2.26.1
## - See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!
## - Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
##   options(mc.cores = parallel::detectCores())
library(see)
#install.packages('tidyverse')
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ✔ readr     2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%--%()       masks igraph::%--%()
## ✖ ggplot2::alpha()        masks kernlab::alpha()
## ✖ tibble::as_data_frame() masks dplyr::as_data_frame(), igraph::as_data_frame()
## ✖ dplyr::combine()        masks randomForest::combine()
## ✖ purrr::compose()        masks igraph::compose()
## ✖ purrr::cross()          masks kernlab::cross()
## ✖ tidyr::crossing()       masks igraph::crossing()
## ✖ tidyr::expand()         masks Matrix::expand()
## ✖ dplyr::filter()         masks stats::filter()
## ✖ dplyr::lag()            masks stats::lag()
## ✖ ggplot2::margin()       masks randomForest::margin()
## ✖ purrr::none()           masks locfit::none()
## ✖ tidyr::pack()           masks Matrix::pack()
## ✖ purrr::simplify()       masks igraph::simplify()
## ✖ tidyr::unpack()         masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#install.packages('caret')
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
## 
## The following objects are masked from 'package:rstanarm':
## 
##     compare_models, R2
#install.packages('ISLR')
library(ISLR)
#install.packages('MCMCpack')
library(MCMCpack)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## ##
## ## Markov Chain Monte Carlo Package (MCMCpack)
## ## Copyright (C) 2003-2024 Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park
## ##
## ## Support provided by the U.S. National Science Foundation
## ## (Grants SES-0350646 and SES-0350613)
## ##
#linstall.packages("caret")
library(caret)
library(TDA)
## 
## Attaching package: 'TDA'
## 
## The following object is masked from 'package:cluster':
## 
##     silhouette
library(TDAstats)
library(ks)
## 
## Attaching package: 'ks'
## 
## The following object is masked from 'package:TDA':
## 
##     kde
## 
## The following object is masked from 'package:MCMCpack':
## 
##     vech
## 
## The following object is masked from 'package:igraph':
## 
##     compare
## 
## The following object is masked from 'package:BayesFactor':
## 
##     compare
#install.packages('MLmetrics')
library(MLmetrics)
## 
## Attaching package: 'MLmetrics'
## 
## The following objects are masked from 'package:caret':
## 
##     MAE, RMSE
## 
## The following object is masked from 'package:base':
## 
##     Recall
#install.packages('googledrive')
library(googledrive)
#install.packages('stringr')
library(stringr)
#install.packages('ks')
library(ks)
#import adult dataset from UCI repository stored on my desktop

#Adult **
adult <- read.csv("~/Desktop/NCU/DissertationDatasets/Adult/adult.data", header=FALSE)
  head(str(adult))
## 'data.frame':    32561 obs. of  15 variables:
##  $ V1 : int  39 50 38 53 28 37 49 52 31 42 ...
##  $ V2 : chr  " State-gov" " Self-emp-not-inc" " Private" " Private" ...
##  $ V3 : int  77516 83311 215646 234721 338409 284582 160187 209642 45781 159449 ...
##  $ V4 : chr  " Bachelors" " Bachelors" " HS-grad" " 11th" ...
##  $ V5 : int  13 13 9 7 13 14 5 9 14 13 ...
##  $ V6 : chr  " Never-married" " Married-civ-spouse" " Divorced" " Married-civ-spouse" ...
##  $ V7 : chr  " Adm-clerical" " Exec-managerial" " Handlers-cleaners" " Handlers-cleaners" ...
##  $ V8 : chr  " Not-in-family" " Husband" " Not-in-family" " Husband" ...
##  $ V9 : chr  " White" " White" " White" " Black" ...
##  $ V10: chr  " Male" " Male" " Male" " Male" ...
##  $ V11: int  2174 0 0 0 0 0 0 0 14084 5178 ...
##  $ V12: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ V13: int  40 13 40 40 40 40 16 45 50 40 ...
##  $ V14: chr  " United-States" " United-States" " United-States" " United-States" ...
##  $ V15: chr  " <=50K" " <=50K" " <=50K" " <=50K" ...
## NULL
  summary(adult)
##        V1             V2                  V3               V4           
##  Min.   :17.00   Length:32561       Min.   :  12285   Length:32561      
##  1st Qu.:28.00   Class :character   1st Qu.: 117827   Class :character  
##  Median :37.00   Mode  :character   Median : 178356   Mode  :character  
##  Mean   :38.58                      Mean   : 189778                     
##  3rd Qu.:48.00                      3rd Qu.: 237051                     
##  Max.   :90.00                      Max.   :1484705                     
##        V5             V6                 V7                 V8           
##  Min.   : 1.00   Length:32561       Length:32561       Length:32561      
##  1st Qu.: 9.00   Class :character   Class :character   Class :character  
##  Median :10.00   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :10.08                                                           
##  3rd Qu.:12.00                                                           
##  Max.   :16.00                                                           
##       V9                V10                 V11             V12        
##  Length:32561       Length:32561       Min.   :    0   Min.   :   0.0  
##  Class :character   Class :character   1st Qu.:    0   1st Qu.:   0.0  
##  Mode  :character   Mode  :character   Median :    0   Median :   0.0  
##                                        Mean   : 1078   Mean   :  87.3  
##                                        3rd Qu.:    0   3rd Qu.:   0.0  
##                                        Max.   :99999   Max.   :4356.0  
##       V13            V14                V15           
##  Min.   : 1.00   Length:32561       Length:32561      
##  1st Qu.:40.00   Class :character   Class :character  
##  Median :40.00   Mode  :character   Mode  :character  
##  Mean   :40.44                                        
##  3rd Qu.:45.00                                        
##  Max.   :99.00
#Dry_Bean_Dataset **
library(readxl)
Dry_Bean_Dataset <- read_excel("~/Desktop/NCU/DissertationDatasets/DryBeanDataset/Dry_Bean_Dataset.xlsx")
  head(str(Dry_Bean_Dataset))
## tibble [13,611 × 17] (S3: tbl_df/tbl/data.frame)
##  $ Area           : num [1:13611] 28395 28734 29380 30008 30140 ...
##  $ Perimeter      : num [1:13611] 610 638 624 646 620 ...
##  $ MajorAxisLength: num [1:13611] 208 201 213 211 202 ...
##  $ MinorAxisLength: num [1:13611] 174 183 176 183 190 ...
##  $ AspectRation   : num [1:13611] 1.2 1.1 1.21 1.15 1.06 ...
##  $ Eccentricity   : num [1:13611] 0.55 0.412 0.563 0.499 0.334 ...
##  $ ConvexArea     : num [1:13611] 28715 29172 29690 30724 30417 ...
##  $ EquivDiameter  : num [1:13611] 190 191 193 195 196 ...
##  $ Extent         : num [1:13611] 0.764 0.784 0.778 0.783 0.773 ...
##  $ Solidity       : num [1:13611] 0.989 0.985 0.99 0.977 0.991 ...
##  $ roundness      : num [1:13611] 0.958 0.887 0.948 0.904 0.985 ...
##  $ Compactness    : num [1:13611] 0.913 0.954 0.909 0.928 0.971 ...
##  $ ShapeFactor1   : num [1:13611] 0.00733 0.00698 0.00724 0.00702 0.0067 ...
##  $ ShapeFactor2   : num [1:13611] 0.00315 0.00356 0.00305 0.00321 0.00366 ...
##  $ ShapeFactor3   : num [1:13611] 0.834 0.91 0.826 0.862 0.942 ...
##  $ ShapeFactor4   : num [1:13611] 0.999 0.998 0.999 0.994 0.999 ...
##  $ Class          : chr [1:13611] "SEKER" "SEKER" "SEKER" "SEKER" ...
## NULL
  summary(Dry_Bean_Dataset)
##       Area          Perimeter      MajorAxisLength MinorAxisLength
##  Min.   : 20420   Min.   : 524.7   Min.   :183.6   Min.   :122.5  
##  1st Qu.: 36328   1st Qu.: 703.5   1st Qu.:253.3   1st Qu.:175.8  
##  Median : 44652   Median : 794.9   Median :296.9   Median :192.4  
##  Mean   : 53048   Mean   : 855.3   Mean   :320.1   Mean   :202.3  
##  3rd Qu.: 61332   3rd Qu.: 977.2   3rd Qu.:376.5   3rd Qu.:217.0  
##  Max.   :254616   Max.   :1985.4   Max.   :738.9   Max.   :460.2  
##   AspectRation    Eccentricity      ConvexArea     EquivDiameter  
##  Min.   :1.025   Min.   :0.2190   Min.   : 20684   Min.   :161.2  
##  1st Qu.:1.432   1st Qu.:0.7159   1st Qu.: 36714   1st Qu.:215.1  
##  Median :1.551   Median :0.7644   Median : 45178   Median :238.4  
##  Mean   :1.583   Mean   :0.7509   Mean   : 53768   Mean   :253.1  
##  3rd Qu.:1.707   3rd Qu.:0.8105   3rd Qu.: 62294   3rd Qu.:279.4  
##  Max.   :2.430   Max.   :0.9114   Max.   :263261   Max.   :569.4  
##      Extent          Solidity        roundness       Compactness    
##  Min.   :0.5553   Min.   :0.9192   Min.   :0.4896   Min.   :0.6406  
##  1st Qu.:0.7186   1st Qu.:0.9857   1st Qu.:0.8321   1st Qu.:0.7625  
##  Median :0.7599   Median :0.9883   Median :0.8832   Median :0.8013  
##  Mean   :0.7497   Mean   :0.9871   Mean   :0.8733   Mean   :0.7999  
##  3rd Qu.:0.7869   3rd Qu.:0.9900   3rd Qu.:0.9169   3rd Qu.:0.8343  
##  Max.   :0.8662   Max.   :0.9947   Max.   :0.9907   Max.   :0.9873  
##   ShapeFactor1       ShapeFactor2        ShapeFactor3     ShapeFactor4   
##  Min.   :0.002778   Min.   :0.0005642   Min.   :0.4103   Min.   :0.9477  
##  1st Qu.:0.005900   1st Qu.:0.0011535   1st Qu.:0.5814   1st Qu.:0.9937  
##  Median :0.006645   Median :0.0016935   Median :0.6420   Median :0.9964  
##  Mean   :0.006564   Mean   :0.0017159   Mean   :0.6436   Mean   :0.9951  
##  3rd Qu.:0.007271   3rd Qu.:0.0021703   3rd Qu.:0.6960   3rd Qu.:0.9979  
##  Max.   :0.010451   Max.   :0.0036650   Max.   :0.9748   Max.   :0.9997  
##     Class          
##  Length:13611      
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
#Poker Hand **
poker.hand.training.true <- read.csv("~/Downloads/poker+hand/poker-hand-training-true.data", header=FALSE)
 head(str(poker.hand.training.true))
## 'data.frame':    25010 obs. of  11 variables:
##  $ V1 : int  1 2 3 4 4 1 1 2 3 4 ...
##  $ V2 : int  10 11 12 10 1 2 9 1 5 1 ...
##  $ V3 : int  1 2 3 4 4 1 1 2 3 4 ...
##  $ V4 : int  11 13 11 11 13 4 12 2 6 4 ...
##  $ V5 : int  1 2 3 4 4 1 1 2 3 4 ...
##  $ V6 : int  13 10 13 1 12 5 10 3 9 2 ...
##  $ V7 : int  1 2 3 4 4 1 1 2 3 4 ...
##  $ V8 : int  12 12 10 13 11 3 11 4 7 3 ...
##  $ V9 : int  1 2 3 4 4 1 1 2 3 4 ...
##  $ V10: int  1 1 1 12 10 6 13 5 8 5 ...
##  $ V11: int  9 9 9 9 9 8 8 8 8 8 ...
## NULL
#diaabetes 130
diabetic_data <- read.csv("~/Desktop/NCU/DissertationDatasets/diabetes+130-us+hospitals+for+years+1999-2008/diabetic_data.csv", header=TRUE)
 head(str(diabetic_data))
## 'data.frame':    101766 obs. of  50 variables:
##  $ encounter_id            : int  2278392 149190 64410 500364 16680 35754 55842 63768 12522 15738 ...
##  $ patient_nbr             : int  8222157 55629189 86047875 82442376 42519267 82637451 84259809 114882984 48330783 63555939 ...
##  $ race                    : chr  "Caucasian" "Caucasian" "AfricanAmerican" "Caucasian" ...
##  $ gender                  : chr  "Female" "Female" "Female" "Male" ...
##  $ age                     : chr  "[0-10)" "[10-20)" "[20-30)" "[30-40)" ...
##  $ weight                  : chr  "?" "?" "?" "?" ...
##  $ admission_type_id       : int  6 1 1 1 1 2 3 1 2 3 ...
##  $ discharge_disposition_id: int  25 1 1 1 1 1 1 1 1 3 ...
##  $ admission_source_id     : int  1 7 7 7 7 2 2 7 4 4 ...
##  $ time_in_hospital        : int  1 3 2 2 1 3 4 5 13 12 ...
##  $ payer_code              : chr  "?" "?" "?" "?" ...
##  $ medical_specialty       : chr  "Pediatrics-Endocrinology" "?" "?" "?" ...
##  $ num_lab_procedures      : int  41 59 11 44 51 31 70 73 68 33 ...
##  $ num_procedures          : int  0 0 5 1 0 6 1 0 2 3 ...
##  $ num_medications         : int  1 18 13 16 8 16 21 12 28 18 ...
##  $ number_outpatient       : int  0 0 2 0 0 0 0 0 0 0 ...
##  $ number_emergency        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ number_inpatient        : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ diag_1                  : chr  "250.83" "276" "648" "8" ...
##  $ diag_2                  : chr  "?" "250.01" "250" "250.43" ...
##  $ diag_3                  : chr  "?" "255" "V27" "403" ...
##  $ number_diagnoses        : int  1 9 6 7 5 9 7 8 8 8 ...
##  $ max_glu_serum           : chr  "None" "None" "None" "None" ...
##  $ A1Cresult               : chr  "None" "None" "None" "None" ...
##  $ metformin               : chr  "No" "No" "No" "No" ...
##  $ repaglinide             : chr  "No" "No" "No" "No" ...
##  $ nateglinide             : chr  "No" "No" "No" "No" ...
##  $ chlorpropamide          : chr  "No" "No" "No" "No" ...
##  $ glimepiride             : chr  "No" "No" "No" "No" ...
##  $ acetohexamide           : chr  "No" "No" "No" "No" ...
##  $ glipizide               : chr  "No" "No" "Steady" "No" ...
##  $ glyburide               : chr  "No" "No" "No" "No" ...
##  $ tolbutamide             : chr  "No" "No" "No" "No" ...
##  $ pioglitazone            : chr  "No" "No" "No" "No" ...
##  $ rosiglitazone           : chr  "No" "No" "No" "No" ...
##  $ acarbose                : chr  "No" "No" "No" "No" ...
##  $ miglitol                : chr  "No" "No" "No" "No" ...
##  $ troglitazone            : chr  "No" "No" "No" "No" ...
##  $ tolazamide              : chr  "No" "No" "No" "No" ...
##  $ examide                 : chr  "No" "No" "No" "No" ...
##  $ citoglipton             : chr  "No" "No" "No" "No" ...
##  $ insulin                 : chr  "No" "Up" "No" "Up" ...
##  $ glyburide.metformin     : chr  "No" "No" "No" "No" ...
##  $ glipizide.metformin     : chr  "No" "No" "No" "No" ...
##  $ glimepiride.pioglitazone: chr  "No" "No" "No" "No" ...
##  $ metformin.rosiglitazone : chr  "No" "No" "No" "No" ...
##  $ metformin.pioglitazone  : chr  "No" "No" "No" "No" ...
##  $ change                  : chr  "No" "Ch" "No" "Ch" ...
##  $ diabetesMed             : chr  "No" "Yes" "Yes" "Yes" ...
##  $ readmitted              : chr  "NO" ">30" "NO" "NO" ...
## NULL
#Taiwanese Bankruptcy Prediction **
data <- read.csv("~/Desktop/NCU/DissertationDatasets/data.csv")
   head(str(data))
## 'data.frame':    6819 obs. of  96 variables:
##  $ Bankrupt.                                              : int  1 1 1 1 1 1 0 0 0 0 ...
##  $ ROA.C..before.interest.and.depreciation.before.interest: num  0.371 0.464 0.426 0.4 0.465 ...
##  $ ROA.A..before.interest.and...after.tax                 : num  0.424 0.538 0.499 0.451 0.538 ...
##  $ ROA.B..before.interest.and.depreciation.after.tax      : num  0.406 0.517 0.472 0.458 0.522 ...
##  $ Operating.Gross.Margin                                 : num  0.601 0.61 0.601 0.584 0.599 ...
##  $ Realized.Sales.Gross.Margin                            : num  0.601 0.61 0.601 0.584 0.599 ...
##  $ Operating.Profit.Rate                                  : num  0.999 0.999 0.999 0.999 0.999 ...
##  $ Pre.tax.net.Interest.Rate                              : num  0.797 0.797 0.796 0.797 0.797 ...
##  $ After.tax.net.Interest.Rate                            : num  0.809 0.809 0.808 0.809 0.809 ...
##  $ Non.industry.income.and.expenditure.revenue            : num  0.303 0.304 0.302 0.303 0.303 ...
##  $ Continuous.interest.rate..after.tax.                   : num  0.781 0.782 0.78 0.781 0.782 ...
##  $ Operating.Expense.Rate                                 : num  1.26e-04 2.90e-04 2.36e-04 1.08e-04 7.89e+09 ...
##  $ Research.and.development.expense.rate                  : num  0.00 0.00 2.55e+07 0.00 0.00 0.00 7.30e+08 5.09e+07 0.00 0.00 ...
##  $ Cash.flow.rate                                         : num  0.458 0.462 0.459 0.466 0.463 ...
##  $ Interest.bearing.debt.interest.rate                    : num  0.000725 0.000647 0.00079 0.000449 0.000686 ...
##  $ Tax.rate..A.                                           : num  0 0 0 0 0 ...
##  $ Net.Value.Per.Share..B.                                : num  0.148 0.182 0.178 0.154 0.168 ...
##  $ Net.Value.Per.Share..A.                                : num  0.148 0.182 0.178 0.154 0.168 ...
##  $ Net.Value.Per.Share..C.                                : num  0.148 0.182 0.194 0.154 0.168 ...
##  $ Persistent.EPS.in.the.Last.Four.Seasons                : num  0.169 0.209 0.181 0.194 0.213 ...
##  $ Cash.Flow.Per.Share                                    : num  0.312 0.318 0.307 0.322 0.319 ...
##  $ Revenue.Per.Share..Yuan...                             : num  0.01756 0.02114 0.00594 0.01437 0.02969 ...
##  $ Operating.Profit.Per.Share..Yuan...                    : num  0.0959 0.0937 0.0923 0.0778 0.0969 ...
##  $ Per.Share.Net.profit.before.tax..Yuan...               : num  0.139 0.17 0.143 0.149 0.168 ...
##  $ Realized.Sales.Gross.Profit.Growth.Rate                : num  0.0221 0.0221 0.0228 0.022 0.0221 ...
##  $ Operating.Profit.Growth.Rate                           : num  0.848 0.848 0.848 0.848 0.848 ...
##  $ After.tax.Net.Profit.Growth.Rate                       : num  0.689 0.69 0.689 0.689 0.69 ...
##  $ Regular.Net.Profit.Growth.Rate                         : num  0.689 0.69 0.689 0.689 0.69 ...
##  $ Continuous.Net.Profit.Growth.Rate                      : num  0.218 0.218 0.218 0.218 0.218 ...
##  $ Total.Asset.Growth.Rate                                : num  4.98e+09 6.11e+09 7.28e+09 4.88e+09 5.51e+09 6.08e+08 5.72e+09 6.63e+09 6.89e+09 5.55e+09 ...
##  $ Net.Value.Growth.Rate                                  : num  0.000327 0.000443 0.000396 0.000382 0.000439 ...
##  $ Total.Asset.Return.Growth.Rate.Ratio                   : num  0.263 0.265 0.264 0.263 0.265 ...
##  $ Cash.Reinvestment..                                    : num  0.364 0.377 0.369 0.384 0.38 ...
##  $ Current.Ratio                                          : num  0.00226 0.00602 0.01154 0.00419 0.00602 ...
##  $ Quick.Ratio                                            : num  0.00121 0.00404 0.00535 0.0029 0.00373 ...
##  $ Interest.Expense.Ratio                                 : num  0.63 0.635 0.63 0.63 0.636 ...
##  $ Total.debt.Total.net.worth                             : num  0.02127 0.0125 0.02125 0.00957 0.00515 ...
##  $ Debt.ratio..                                           : num  0.208 0.171 0.208 0.151 0.107 ...
##  $ Net.worth.Assets                                       : num  0.792 0.829 0.792 0.849 0.893 ...
##  $ Long.term.fund.suitability.ratio..A.                   : num  0.00502 0.00506 0.0051 0.00505 0.0053 ...
##  $ Borrowing.dependency                                   : num  0.39 0.377 0.379 0.38 0.375 ...
##  $ Contingent.liabilities.Net.worth                       : num  0.00648 0.00584 0.00656 0.00537 0.00662 ...
##  $ Operating.profit.Paid.in.capital                       : num  0.0959 0.0937 0.0923 0.0777 0.0969 ...
##  $ Net.profit.before.tax.Paid.in.capital                  : num  0.138 0.169 0.148 0.148 0.167 ...
##  $ Inventory.and.accounts.receivable.Net.value            : num  0.398 0.398 0.407 0.398 0.4 ...
##  $ Total.Asset.Turnover                                   : num  0.087 0.0645 0.015 0.09 0.1754 ...
##  $ Accounts.Receivable.Turnover                           : num  0.00181 0.00129 0.0015 0.00197 0.00145 ...
##  $ Average.Collection.Days                                : num  0.00349 0.00492 0.00423 0.00321 0.00437 ...
##  $ Inventory.Turnover.Rate..times.                        : num  1.82e-04 9.36e+09 6.50e+07 7.13e+09 1.63e-04 ...
##  $ Fixed.Assets.Turnover.Frequency                        : num  1.17e-04 7.19e+08 2.65e+09 9.15e+09 2.94e-04 ...
##  $ Net.Worth.Turnover.Rate..times.                        : num  0.0329 0.0255 0.0134 0.0281 0.0402 ...
##  $ Revenue.per.person                                     : num  0.03416 0.00689 0.029 0.01546 0.05811 ...
##  $ Operating.profit.per.person                            : num  0.393 0.392 0.382 0.378 0.394 ...
##  $ Allocation.rate.per.person                             : num  0.0371 0.0123 0.141 0.0213 0.024 ...
##  $ Working.Capital.to.Total.Assets                        : num  0.673 0.751 0.83 0.726 0.752 ...
##  $ Quick.Assets.Total.Assets                              : num  0.167 0.127 0.34 0.162 0.26 ...
##  $ Current.Assets.Total.Assets                            : num  0.191 0.182 0.603 0.226 0.358 ...
##  $ Cash.Total.Assets                                      : num  0.004094 0.014948 0.000991 0.018851 0.014161 ...
##  $ Quick.Assets.Current.Liability                         : num  0.002 0.00414 0.0063 0.00296 0.00427 ...
##  $ Cash.Current.Liability                                 : num  1.47e-04 1.38e-03 5.34e+09 1.01e-03 6.80e-04 ...
##  $ Current.Liability.to.Assets                            : num  0.1473 0.057 0.0982 0.0987 0.1102 ...
##  $ Operating.Funds.to.Liability                           : num  0.334 0.341 0.337 0.349 0.345 ...
##  $ Inventory.Working.Capital                              : num  0.277 0.29 0.277 0.277 0.288 ...
##  $ Inventory.Current.Liability                            : num  0.00104 0.00521 0.01388 0.00354 0.00487 ...
##  $ Current.Liabilities.Liability                          : num  0.676 0.309 0.446 0.616 0.975 ...
##  $ Working.Capital.Equity                                 : num  0.721 0.732 0.743 0.73 0.732 ...
##  $ Current.Liabilities.Equity                             : num  0.339 0.33 0.335 0.332 0.331 ...
##  $ Long.term.Liability.to.Current.Assets                  : num  0.02559 0.02395 0.00372 0.02217 0 ...
##  $ Retained.Earnings.to.Total.Assets                      : num  0.903 0.931 0.91 0.907 0.914 ...
##  $ Total.income.Total.expense                             : num  0.00202 0.00223 0.00206 0.00183 0.00222 ...
##  $ Total.expense.Assets                                   : num  0.0649 0.0255 0.0214 0.0242 0.0264 ...
##  $ Current.Asset.Turnover.Rate                            : num  7.01e+08 1.07e-04 1.79e-03 8.14e+09 6.68e+09 ...
##  $ Quick.Asset.Turnover.Rate                              : num  6.55e+09 7.70e+09 1.02e-03 6.05e+09 5.05e+09 ...
##  $ Working.capitcal.Turnover.Rate                         : num  0.594 0.594 0.595 0.594 0.594 ...
##  $ Cash.Turnover.Rate                                     : num  4.58e+08 2.49e+09 7.61e+08 2.03e+09 8.24e+08 ...
##  $ Cash.Flow.to.Sales                                     : num  0.672 0.672 0.672 0.672 0.672 ...
##  $ Fixed.Assets.to.Assets                                 : num  0.424 0.469 0.276 0.559 0.31 ...
##  $ Current.Liability.to.Liability                         : num  0.676 0.309 0.446 0.616 0.975 ...
##  $ Current.Liability.to.Equity                            : num  0.339 0.33 0.335 0.332 0.331 ...
##  $ Equity.to.Long.term.Liability                          : num  0.127 0.121 0.118 0.121 0.111 ...
##  $ Cash.Flow.to.Total.Assets                              : num  0.638 0.641 0.643 0.579 0.622 ...
##  $ Cash.Flow.to.Liability                                 : num  0.459 0.459 0.459 0.449 0.454 ...
##  $ CFO.to.Assets                                          : num  0.52 0.567 0.538 0.604 0.578 ...
##  $ Cash.Flow.to.Equity                                    : num  0.313 0.314 0.315 0.302 0.312 ...
##  $ Current.Liability.to.Current.Assets                    : num  0.1183 0.0478 0.0253 0.0672 0.0477 ...
##  $ Liability.Assets.Flag                                  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Net.Income.to.Total.Assets                             : num  0.717 0.795 0.775 0.74 0.795 ...
##  $ Total.assets.to.GNP.price                              : num  0.00922 0.00832 0.04 0.00325 0.00388 ...
##  $ No.credit.Interval                                     : num  0.623 0.624 0.624 0.623 0.624 ...
##  $ Gross.Profit.to.Sales                                  : num  0.601 0.61 0.601 0.584 0.599 ...
##  $ Net.Income.to.Stockholder.s.Equity                     : num  0.828 0.84 0.837 0.835 0.84 ...
##  $ Liability.to.Equity                                    : num  0.29 0.284 0.29 0.282 0.279 ...
##  $ Degree.of.Financial.Leverage..DFL.                     : num  0.0266 0.2646 0.0266 0.0267 0.0248 ...
##  $ Interest.Coverage.Ratio..Interest.expense.to.EBIT.     : num  0.564 0.57 0.564 0.565 0.576 ...
##  $ Net.Income.Flag                                        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Equity.to.Liability                                    : num  0.0165 0.0208 0.0165 0.024 0.0355 ...
## NULL
   summary(data)
##    Bankrupt.       ROA.C..before.interest.and.depreciation.before.interest
##  Min.   :0.00000   Min.   :0.0000                                         
##  1st Qu.:0.00000   1st Qu.:0.4765                                         
##  Median :0.00000   Median :0.5027                                         
##  Mean   :0.03226   Mean   :0.5052                                         
##  3rd Qu.:0.00000   3rd Qu.:0.5356                                         
##  Max.   :1.00000   Max.   :1.0000                                         
##  ROA.A..before.interest.and...after.tax
##  Min.   :0.0000                        
##  1st Qu.:0.5355                        
##  Median :0.5598                        
##  Mean   :0.5586                        
##  3rd Qu.:0.5892                        
##  Max.   :1.0000                        
##  ROA.B..before.interest.and.depreciation.after.tax Operating.Gross.Margin
##  Min.   :0.0000                                    Min.   :0.0000        
##  1st Qu.:0.5273                                    1st Qu.:0.6004        
##  Median :0.5523                                    Median :0.6060        
##  Mean   :0.5536                                    Mean   :0.6079        
##  3rd Qu.:0.5841                                    3rd Qu.:0.6139        
##  Max.   :1.0000                                    Max.   :1.0000        
##  Realized.Sales.Gross.Margin Operating.Profit.Rate Pre.tax.net.Interest.Rate
##  Min.   :0.0000              Min.   :0.0000        Min.   :0.0000           
##  1st Qu.:0.6004              1st Qu.:0.9990        1st Qu.:0.7974           
##  Median :0.6060              Median :0.9990        Median :0.7975           
##  Mean   :0.6079              Mean   :0.9988        Mean   :0.7972           
##  3rd Qu.:0.6138              3rd Qu.:0.9991        3rd Qu.:0.7976           
##  Max.   :1.0000              Max.   :1.0000        Max.   :1.0000           
##  After.tax.net.Interest.Rate Non.industry.income.and.expenditure.revenue
##  Min.   :0.0000              Min.   :0.0000                             
##  1st Qu.:0.8093              1st Qu.:0.3035                             
##  Median :0.8094              Median :0.3035                             
##  Mean   :0.8091              Mean   :0.3036                             
##  3rd Qu.:0.8095              3rd Qu.:0.3036                             
##  Max.   :1.0000              Max.   :1.0000                             
##  Continuous.interest.rate..after.tax. Operating.Expense.Rate
##  Min.   :0.0000                       Min.   :0.000e+00     
##  1st Qu.:0.7816                       1st Qu.:0.000e+00     
##  Median :0.7816                       Median :0.000e+00     
##  Mean   :0.7814                       Mean   :1.995e+09     
##  3rd Qu.:0.7817                       3rd Qu.:4.145e+09     
##  Max.   :1.0000                       Max.   :9.990e+09     
##  Research.and.development.expense.rate Cash.flow.rate  
##  Min.   :0.00e+00                      Min.   :0.0000  
##  1st Qu.:0.00e+00                      1st Qu.:0.4616  
##  Median :5.09e+08                      Median :0.4651  
##  Mean   :1.95e+09                      Mean   :0.4674  
##  3rd Qu.:3.45e+09                      3rd Qu.:0.4710  
##  Max.   :9.98e+09                      Max.   :1.0000  
##  Interest.bearing.debt.interest.rate  Tax.rate..A.     Net.Value.Per.Share..B.
##  Min.   :        0                   Min.   :0.00000   Min.   :0.0000         
##  1st Qu.:        0                   1st Qu.:0.00000   1st Qu.:0.1736         
##  Median :        0                   Median :0.07349   Median :0.1844         
##  Mean   : 16448013                   Mean   :0.11500   Mean   :0.1907         
##  3rd Qu.:        0                   3rd Qu.:0.20584   3rd Qu.:0.1996         
##  Max.   :990000000                   Max.   :1.00000   Max.   :1.0000         
##  Net.Value.Per.Share..A. Net.Value.Per.Share..C.
##  Min.   :0.0000          Min.   :0.0000         
##  1st Qu.:0.1736          1st Qu.:0.1737         
##  Median :0.1844          Median :0.1844         
##  Mean   :0.1906          Mean   :0.1907         
##  3rd Qu.:0.1996          3rd Qu.:0.1996         
##  Max.   :1.0000          Max.   :1.0000         
##  Persistent.EPS.in.the.Last.Four.Seasons Cash.Flow.Per.Share
##  Min.   :0.0000                          Min.   :0.0000     
##  1st Qu.:0.2147                          1st Qu.:0.3177     
##  Median :0.2245                          Median :0.3225     
##  Mean   :0.2288                          Mean   :0.3235     
##  3rd Qu.:0.2388                          3rd Qu.:0.3286     
##  Max.   :1.0000                          Max.   :1.0000     
##  Revenue.Per.Share..Yuan... Operating.Profit.Per.Share..Yuan...
##  Min.   :0.000e+00          Min.   :0.00000                    
##  1st Qu.:0.000e+00          1st Qu.:0.09608                    
##  Median :0.000e+00          Median :0.10423                    
##  Mean   :1.329e+06          Mean   :0.10909                    
##  3rd Qu.:0.000e+00          3rd Qu.:0.11615                    
##  Max.   :3.020e+09          Max.   :1.00000                    
##  Per.Share.Net.profit.before.tax..Yuan...
##  Min.   :0.0000                          
##  1st Qu.:0.1704                          
##  Median :0.1797                          
##  Mean   :0.1844                          
##  3rd Qu.:0.1935                          
##  Max.   :1.0000                          
##  Realized.Sales.Gross.Profit.Growth.Rate Operating.Profit.Growth.Rate
##  Min.   :0.00000                         Min.   :0.0000              
##  1st Qu.:0.02206                         1st Qu.:0.8480              
##  Median :0.02210                         Median :0.8480              
##  Mean   :0.02241                         Mean   :0.8480              
##  3rd Qu.:0.02215                         3rd Qu.:0.8481              
##  Max.   :1.00000                         Max.   :1.0000              
##  After.tax.Net.Profit.Growth.Rate Regular.Net.Profit.Growth.Rate
##  Min.   :0.0000                   Min.   :0.0000                
##  1st Qu.:0.6893                   1st Qu.:0.6893                
##  Median :0.6894                   Median :0.6894                
##  Mean   :0.6891                   Mean   :0.6892                
##  3rd Qu.:0.6896                   3rd Qu.:0.6896                
##  Max.   :1.0000                   Max.   :1.0000                
##  Continuous.Net.Profit.Growth.Rate Total.Asset.Growth.Rate
##  Min.   :0.0000                    Min.   :0.000e+00      
##  1st Qu.:0.2176                    1st Qu.:4.860e+09      
##  Median :0.2176                    Median :6.400e+09      
##  Mean   :0.2176                    Mean   :5.508e+09      
##  3rd Qu.:0.2176                    3rd Qu.:7.390e+09      
##  Max.   :1.0000                    Max.   :9.990e+09      
##  Net.Value.Growth.Rate Total.Asset.Return.Growth.Rate.Ratio Cash.Reinvestment..
##  Min.   :0.000e+00     Min.   :0.0000                       Min.   :0.0000     
##  1st Qu.:0.000e+00     1st Qu.:0.2638                       1st Qu.:0.3747     
##  Median :0.000e+00     Median :0.2640                       Median :0.3804     
##  Mean   :1.566e+06     Mean   :0.2642                       Mean   :0.3797     
##  3rd Qu.:0.000e+00     3rd Qu.:0.2644                       3rd Qu.:0.3867     
##  Max.   :9.330e+09     Max.   :1.0000                       Max.   :1.0000     
##  Current.Ratio        Quick.Ratio        Interest.Expense.Ratio
##  Min.   :0.000e+00   Min.   :0.000e+00   Min.   :0.0000        
##  1st Qu.:0.000e+00   1st Qu.:0.000e+00   1st Qu.:0.6306        
##  Median :0.000e+00   Median :0.000e+00   Median :0.6307        
##  Mean   :4.033e+05   Mean   :8.377e+06   Mean   :0.6310        
##  3rd Qu.:0.000e+00   3rd Qu.:0.000e+00   3rd Qu.:0.6311        
##  Max.   :2.750e+09   Max.   :9.230e+09   Max.   :1.0000        
##  Total.debt.Total.net.worth  Debt.ratio..     Net.worth.Assets
##  Min.   :0.000e+00          Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.000e+00          1st Qu.:0.07289   1st Qu.:0.8512  
##  Median :0.000e+00          Median :0.11141   Median :0.8886  
##  Mean   :4.416e+06          Mean   :0.11318   Mean   :0.8868  
##  3rd Qu.:0.000e+00          3rd Qu.:0.14880   3rd Qu.:0.9271  
##  Max.   :9.940e+09          Max.   :1.00000   Max.   :1.0000  
##  Long.term.fund.suitability.ratio..A. Borrowing.dependency
##  Min.   :0.000000                     Min.   :0.0000      
##  1st Qu.:0.005244                     1st Qu.:0.3702      
##  Median :0.005665                     Median :0.3726      
##  Mean   :0.008783                     Mean   :0.3747      
##  3rd Qu.:0.006847                     3rd Qu.:0.3763      
##  Max.   :1.000000                     Max.   :1.0000      
##  Contingent.liabilities.Net.worth Operating.profit.Paid.in.capital
##  Min.   :0.000000                 Min.   :0.0000                  
##  1st Qu.:0.005366                 1st Qu.:0.0961                  
##  Median :0.005366                 Median :0.1041                  
##  Mean   :0.005968                 Mean   :0.1090                  
##  3rd Qu.:0.005764                 3rd Qu.:0.1159                  
##  Max.   :1.000000                 Max.   :1.0000                  
##  Net.profit.before.tax.Paid.in.capital
##  Min.   :0.0000                       
##  1st Qu.:0.1694                       
##  Median :0.1785                       
##  Mean   :0.1827                       
##  3rd Qu.:0.1916                       
##  Max.   :1.0000                       
##  Inventory.and.accounts.receivable.Net.value Total.Asset.Turnover
##  Min.   :0.0000                              Min.   :0.00000     
##  1st Qu.:0.3974                              1st Qu.:0.07646     
##  Median :0.4001                              Median :0.11844     
##  Mean   :0.4025                              Mean   :0.14161     
##  3rd Qu.:0.4046                              3rd Qu.:0.17691     
##  Max.   :1.0000                              Max.   :1.00000     
##  Accounts.Receivable.Turnover Average.Collection.Days
##  Min.   :0.000e+00            Min.   :0.000e+00      
##  1st Qu.:0.000e+00            1st Qu.:0.000e+00      
##  Median :0.000e+00            Median :0.000e+00      
##  Mean   :1.279e+07            Mean   :9.826e+06      
##  3rd Qu.:0.000e+00            3rd Qu.:0.000e+00      
##  Max.   :9.740e+09            Max.   :9.730e+09      
##  Inventory.Turnover.Rate..times. Fixed.Assets.Turnover.Frequency
##  Min.   :0.000e+00               Min.   :0.000e+00              
##  1st Qu.:0.000e+00               1st Qu.:0.000e+00              
##  Median :0.000e+00               Median :0.000e+00              
##  Mean   :2.149e+09               Mean   :1.009e+09              
##  3rd Qu.:4.620e+09               3rd Qu.:0.000e+00              
##  Max.   :9.990e+09               Max.   :9.990e+09              
##  Net.Worth.Turnover.Rate..times. Revenue.per.person 
##  Min.   :0.00000                 Min.   :0.000e+00  
##  1st Qu.:0.02177                 1st Qu.:0.000e+00  
##  Median :0.02952                 Median :0.000e+00  
##  Mean   :0.03860                 Mean   :2.326e+06  
##  3rd Qu.:0.04290                 3rd Qu.:0.000e+00  
##  Max.   :1.00000                 Max.   :8.810e+09  
##  Operating.profit.per.person Allocation.rate.per.person
##  Min.   :0.0000              Min.   :0.000e+00         
##  1st Qu.:0.3924              1st Qu.:0.000e+00         
##  Median :0.3959              Median :0.000e+00         
##  Mean   :0.4007              Mean   :1.126e+07         
##  3rd Qu.:0.4019              3rd Qu.:0.000e+00         
##  Max.   :1.0000              Max.   :9.570e+09         
##  Working.Capital.to.Total.Assets Quick.Assets.Total.Assets
##  Min.   :0.0000                  Min.   :0.0000           
##  1st Qu.:0.7743                  1st Qu.:0.2420           
##  Median :0.8103                  Median :0.3865           
##  Mean   :0.8141                  Mean   :0.4001           
##  3rd Qu.:0.8504                  3rd Qu.:0.5406           
##  Max.   :1.0000                  Max.   :1.0000           
##  Current.Assets.Total.Assets Cash.Total.Assets Quick.Assets.Current.Liability
##  Min.   :0.0000              Min.   :0.00000   Min.   :0.000e+00             
##  1st Qu.:0.3528              1st Qu.:0.03354   1st Qu.:0.000e+00             
##  Median :0.5148              Median :0.07489   Median :0.000e+00             
##  Mean   :0.5223              Mean   :0.12409   Mean   :3.593e+06             
##  3rd Qu.:0.6891              3rd Qu.:0.16107   3rd Qu.:0.000e+00             
##  Max.   :1.0000              Max.   :1.00000   Max.   :8.820e+09             
##  Cash.Current.Liability Current.Liability.to.Assets
##  Min.   :0.000e+00      Min.   :0.00000            
##  1st Qu.:0.000e+00      1st Qu.:0.05330            
##  Median :0.000e+00      Median :0.08270            
##  Mean   :3.716e+07      Mean   :0.09067            
##  3rd Qu.:0.000e+00      3rd Qu.:0.11952            
##  Max.   :9.650e+09      Max.   :1.00000            
##  Operating.Funds.to.Liability Inventory.Working.Capital
##  Min.   :0.0000               Min.   :0.0000           
##  1st Qu.:0.3410               1st Qu.:0.2770           
##  Median :0.3486               Median :0.2772           
##  Mean   :0.3538               Mean   :0.2774           
##  3rd Qu.:0.3609               3rd Qu.:0.2774           
##  Max.   :1.0000               Max.   :1.0000           
##  Inventory.Current.Liability Current.Liabilities.Liability
##  Min.   :0.000e+00           Min.   :0.0000               
##  1st Qu.:0.000e+00           1st Qu.:0.6270               
##  Median :0.000e+00           Median :0.8069               
##  Mean   :5.581e+07           Mean   :0.7616               
##  3rd Qu.:0.000e+00           3rd Qu.:0.9420               
##  Max.   :9.910e+09           Max.   :1.0000               
##  Working.Capital.Equity Current.Liabilities.Equity
##  Min.   :0.0000         Min.   :0.0000            
##  1st Qu.:0.7336         1st Qu.:0.3281            
##  Median :0.7360         Median :0.3297            
##  Mean   :0.7358         Mean   :0.3314            
##  3rd Qu.:0.7386         3rd Qu.:0.3323            
##  Max.   :1.0000         Max.   :1.0000            
##  Long.term.Liability.to.Current.Assets Retained.Earnings.to.Total.Assets
##  Min.   :0.000e+00                     Min.   :0.0000                   
##  1st Qu.:0.000e+00                     1st Qu.:0.9311                   
##  Median :0.000e+00                     Median :0.9377                   
##  Mean   :5.416e+07                     Mean   :0.9347                   
##  3rd Qu.:0.000e+00                     3rd Qu.:0.9448                   
##  Max.   :9.540e+09                     Max.   :1.0000                   
##  Total.income.Total.expense Total.expense.Assets Current.Asset.Turnover.Rate
##  Min.   :0.000000           Min.   :0.00000      Min.   :0.000e+00          
##  1st Qu.:0.002236           1st Qu.:0.01457      1st Qu.:0.000e+00          
##  Median :0.002336           Median :0.02267      Median :0.000e+00          
##  Mean   :0.002549           Mean   :0.02918      Mean   :1.196e+09          
##  3rd Qu.:0.002492           3rd Qu.:0.03593      3rd Qu.:0.000e+00          
##  Max.   :1.000000           Max.   :1.00000      Max.   :1.000e+10          
##  Quick.Asset.Turnover.Rate Working.capitcal.Turnover.Rate Cash.Turnover.Rate 
##  Min.   :0.000e+00         Min.   :0.0000                 Min.   :0.000e+00  
##  1st Qu.:0.000e+00         1st Qu.:0.5939                 1st Qu.:0.000e+00  
##  Median :0.000e+00         Median :0.5940                 Median :1.080e+09  
##  Mean   :2.164e+09         Mean   :0.5940                 Mean   :2.472e+09  
##  3rd Qu.:4.900e+09         3rd Qu.:0.5940                 3rd Qu.:4.510e+09  
##  Max.   :1.000e+10         Max.   :1.0000                 Max.   :1.000e+10  
##  Cash.Flow.to.Sales Fixed.Assets.to.Assets Current.Liability.to.Liability
##  Min.   :0.0000     Min.   :0.00e+00       Min.   :0.0000                
##  1st Qu.:0.6716     1st Qu.:0.00e+00       1st Qu.:0.6270                
##  Median :0.6716     Median :0.00e+00       Median :0.8069                
##  Mean   :0.6715     Mean   :1.22e+06       Mean   :0.7616                
##  3rd Qu.:0.6716     3rd Qu.:0.00e+00       3rd Qu.:0.9420                
##  Max.   :1.0000     Max.   :8.32e+09       Max.   :1.0000                
##  Current.Liability.to.Equity Equity.to.Long.term.Liability
##  Min.   :0.0000              Min.   :0.0000               
##  1st Qu.:0.3281              1st Qu.:0.1109               
##  Median :0.3297              Median :0.1123               
##  Mean   :0.3314              Mean   :0.1156               
##  3rd Qu.:0.3323              3rd Qu.:0.1171               
##  Max.   :1.0000              Max.   :1.0000               
##  Cash.Flow.to.Total.Assets Cash.Flow.to.Liability CFO.to.Assets   
##  Min.   :0.0000            Min.   :0.0000         Min.   :0.0000  
##  1st Qu.:0.6333            1st Qu.:0.4571         1st Qu.:0.5660  
##  Median :0.6454            Median :0.4598         Median :0.5933  
##  Mean   :0.6497            Mean   :0.4618         Mean   :0.5934  
##  3rd Qu.:0.6631            3rd Qu.:0.4642         3rd Qu.:0.6248  
##  Max.   :1.0000            Max.   :1.0000         Max.   :1.0000  
##  Cash.Flow.to.Equity Current.Liability.to.Current.Assets Liability.Assets.Flag
##  Min.   :0.0000      Min.   :0.00000                     Min.   :0.000000     
##  1st Qu.:0.3130      1st Qu.:0.01803                     1st Qu.:0.000000     
##  Median :0.3150      Median :0.02760                     Median :0.000000     
##  Mean   :0.3156      Mean   :0.03151                     Mean   :0.001173     
##  3rd Qu.:0.3177      3rd Qu.:0.03837                     3rd Qu.:0.000000     
##  Max.   :1.0000      Max.   :1.00000                     Max.   :1.000000     
##  Net.Income.to.Total.Assets Total.assets.to.GNP.price No.credit.Interval
##  Min.   :0.0000             Min.   :0.000e+00         Min.   :0.0000    
##  1st Qu.:0.7967             1st Qu.:0.000e+00         1st Qu.:0.6236    
##  Median :0.8106             Median :0.000e+00         Median :0.6239    
##  Mean   :0.8078             Mean   :1.863e+07         Mean   :0.6239    
##  3rd Qu.:0.8265             3rd Qu.:0.000e+00         3rd Qu.:0.6242    
##  Max.   :1.0000             Max.   :9.820e+09         Max.   :1.0000    
##  Gross.Profit.to.Sales Net.Income.to.Stockholder.s.Equity Liability.to.Equity
##  Min.   :0.0000        Min.   :0.0000                     Min.   :0.0000     
##  1st Qu.:0.6004        1st Qu.:0.8401                     1st Qu.:0.2769     
##  Median :0.6060        Median :0.8412                     Median :0.2788     
##  Mean   :0.6079        Mean   :0.8404                     Mean   :0.2804     
##  3rd Qu.:0.6139        3rd Qu.:0.8424                     3rd Qu.:0.2814     
##  Max.   :1.0000        Max.   :1.0000                     Max.   :1.0000     
##  Degree.of.Financial.Leverage..DFL.
##  Min.   :0.00000                   
##  1st Qu.:0.02679                   
##  Median :0.02681                   
##  Mean   :0.02754                   
##  3rd Qu.:0.02691                   
##  Max.   :1.00000                   
##  Interest.Coverage.Ratio..Interest.expense.to.EBIT. Net.Income.Flag
##  Min.   :0.0000                                     Min.   :1      
##  1st Qu.:0.5652                                     1st Qu.:1      
##  Median :0.5653                                     Median :1      
##  Mean   :0.5654                                     Mean   :1      
##  3rd Qu.:0.5657                                     3rd Qu.:1      
##  Max.   :1.0000                                     Max.   :1      
##  Equity.to.Liability
##  Min.   :0.00000    
##  1st Qu.:0.02448    
##  Median :0.03380    
##  Mean   :0.04758    
##  3rd Qu.:0.05284    
##  Max.   :1.00000
taiwanese_data<-data

#Bitcoin Heist Ransomware Address **
library(readr)
BitcoinHeistData.2 <- read.csv("~/Desktop/NCU/DissertationDatasets/BitcoinHeistData 2.csv")
 head(str(BitcoinHeistData.2))
## 'data.frame':    2916697 obs. of  10 variables:
##  $ address  : chr  "111K8kZAEnJg245r2cM6y9zgJGHZtJPy6" "1123pJv8jzeFQaCV4w644pzQJzVWay2zcA" "112536im7hy6wtKbpH1qYDWtTyMRAcA2p7" "1126eDRw2wqSkWosjTCre8cjjQW8sSeWH7" ...
##  $ year     : int  2017 2016 2016 2016 2016 2016 2016 2016 2016 2016 ...
##  $ day      : int  11 132 246 322 238 96 225 324 298 62 ...
##  $ length   : int  18 44 0 72 144 144 142 78 144 112 ...
##  $ weight   : num  0.008333 0.000244 1 0.003906 0.072848 ...
##  $ count    : int  1 1 1 1 456 2821 881 1 4220 1 ...
##  $ looped   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ neighbors: int  2 1 2 2 1 1 2 2 2 1 ...
##  $ income   : num  1.00e+08 1.00e+08 2.00e+08 7.12e+07 2.00e+08 ...
##  $ label    : chr  "princetonCerber" "princetonLocky" "princetonCerber" "princetonCerber" ...
## NULL
 summary(BitcoinHeistData.2)
##    address               year           day            length      
##  Length:2916697     Min.   :2011   Min.   :  1.0   Min.   :  0.00  
##  Class :character   1st Qu.:2013   1st Qu.: 92.0   1st Qu.:  2.00  
##  Mode  :character   Median :2014   Median :181.0   Median :  8.00  
##                     Mean   :2014   Mean   :181.5   Mean   : 45.01  
##                     3rd Qu.:2016   3rd Qu.:271.0   3rd Qu.:108.00  
##                     Max.   :2018   Max.   :365.0   Max.   :144.00  
##      weight              count             looped          neighbors        
##  Min.   :   0.0000   Min.   :    1.0   Min.   :    0.0   Min.   :    1.000  
##  1st Qu.:   0.0215   1st Qu.:    1.0   1st Qu.:    0.0   1st Qu.:    1.000  
##  Median :   0.2500   Median :    1.0   Median :    0.0   Median :    2.000  
##  Mean   :   0.5455   Mean   :  721.6   Mean   :  238.5   Mean   :    2.207  
##  3rd Qu.:   0.8819   3rd Qu.:   56.0   3rd Qu.:    0.0   3rd Qu.:    2.000  
##  Max.   :1943.7488   Max.   :14497.0   Max.   :14496.0   Max.   :12920.000  
##      income             label          
##  Min.   :3.000e+07   Length:2916697    
##  1st Qu.:7.429e+07   Class :character  
##  Median :2.000e+08   Mode  :character  
##  Mean   :4.465e+09                     
##  3rd Qu.:9.940e+08                     
##  Max.   :4.996e+13
#RT-IoT2022 **
library(readr)
RT_IOT2022 <- read.csv("~/Downloads/RT_IOT2022")
   head(str(RT_IOT2022))
## 'data.frame':    123117 obs. of  85 variables:
##  $ X                       : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ id.orig_p               : int  38667 51143 44761 60893 51087 48579 54063 33457 52181 53469 ...
##  $ id.resp_p               : int  1883 1883 1883 1883 1883 1883 1883 1883 1883 1883 ...
##  $ proto                   : chr  "tcp" "tcp" "tcp" "tcp" ...
##  $ service                 : chr  "mqtt" "mqtt" "mqtt" "mqtt" ...
##  $ flow_duration           : num  32 31.9 32.1 32 31.9 ...
##  $ fwd_pkts_tot            : int  9 9 9 9 9 9 9 9 9 9 ...
##  $ bwd_pkts_tot            : int  5 5 5 5 5 5 5 5 5 5 ...
##  $ fwd_data_pkts_tot       : int  3 3 3 3 3 3 3 3 3 3 ...
##  $ bwd_data_pkts_tot       : int  3 3 3 3 3 3 3 3 3 3 ...
##  $ fwd_pkts_per_sec        : num  0.281 0.282 0.28 0.282 0.282 ...
##  $ bwd_pkts_per_sec        : num  0.156 0.157 0.156 0.156 0.157 ...
##  $ flow_pkts_per_sec       : num  0.437 0.439 0.436 0.438 0.439 ...
##  $ down_up_ratio           : num  0.556 0.556 0.556 0.556 0.556 ...
##  $ fwd_header_size_tot     : int  296 296 296 296 296 296 296 296 296 296 ...
##  $ fwd_header_size_min     : int  32 32 32 32 32 32 32 32 32 32 ...
##  $ fwd_header_size_max     : int  40 40 40 40 40 40 40 40 40 40 ...
##  $ bwd_header_size_tot     : int  168 168 168 168 168 168 168 168 168 168 ...
##  $ bwd_header_size_min     : int  32 32 32 32 32 32 32 32 32 32 ...
##  $ bwd_header_size_max     : int  40 40 40 40 40 40 40 40 40 40 ...
##  $ flow_FIN_flag_count     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ flow_SYN_flag_count     : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ flow_RST_flag_count     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ fwd_PSH_flag_count      : int  3 3 3 3 3 3 3 3 3 3 ...
##  $ bwd_PSH_flag_count      : int  3 3 3 3 3 3 3 3 3 3 ...
##  $ flow_ACK_flag_count     : int  13 13 13 13 13 13 13 13 13 13 ...
##  $ fwd_URG_flag_count      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ bwd_URG_flag_count      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ flow_CWR_flag_count     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ flow_ECE_flag_count     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ fwd_pkts_payload.min    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fwd_pkts_payload.max    : num  33 33 33 33 33 33 33 33 33 33 ...
##  $ fwd_pkts_payload.tot    : num  76 76 74 74 76 76 76 76 76 76 ...
##  $ fwd_pkts_payload.avg    : num  8.44 8.44 8.22 8.22 8.44 ...
##  $ fwd_pkts_payload.std    : num  13.1 13.1 12.9 12.9 13.1 ...
##  $ bwd_pkts_payload.min    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bwd_pkts_payload.max    : num  23 23 21 21 23 23 23 23 23 23 ...
##  $ bwd_pkts_payload.tot    : num  32 32 30 30 32 32 32 32 32 32 ...
##  $ bwd_pkts_payload.avg    : num  6.4 6.4 6 6 6.4 6.4 6.4 6.4 6.4 6.4 ...
##  $ bwd_pkts_payload.std    : num  9.56 9.56 8.69 8.69 9.56 ...
##  $ flow_pkts_payload.min   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ flow_pkts_payload.max   : num  33 33 33 33 33 33 33 33 33 33 ...
##  $ flow_pkts_payload.tot   : num  108 108 104 104 108 108 108 108 108 108 ...
##  $ flow_pkts_payload.avg   : num  7.71 7.71 7.43 7.43 7.71 ...
##  $ flow_pkts_payload.std   : num  11.6 11.6 11.2 11.2 11.6 ...
##  $ fwd_iat.min             : num  762 247 284 289 388 ...
##  $ fwd_iat.max             : num  29729183 29855277 29842149 29913775 29814705 ...
##  $ fwd_iat.tot             : num  32011598 31883584 32124053 31961063 31902362 ...
##  $ fwd_iat.avg             : num  4001450 3985448 4015507 3995133 3987795 ...
##  $ fwd_iat.std             : num  10403074 10463456 10442378 10482528 10447019 ...
##  $ bwd_iat.min             : num  4439 4214 2457 3934 3005 ...
##  $ bwd_iat.max             : num  1511694 1576436 1476049 1551892 1632083 ...
##  $ bwd_iat.tot             : num  2026391 1876261 2013770 1883784 1935984 ...
##  $ bwd_iat.avg             : num  506598 469065 503442 470946 483996 ...
##  $ bwd_iat.std             : num  680406 741352 660344 724569 768543 ...
##  $ flow_iat.min            : num  762 247 284 289 388 ...
##  $ flow_iat.max            : num  29729183 29855277 29842149 29913775 29814705 ...
##  $ flow_iat.tot            : num  32011598 31883584 32124053 31961063 31902362 ...
##  $ flow_iat.avg            : num  2462431 2452583 2471081 2458543 2454028 ...
##  $ flow_iat.std            : num  8199747 8242459 8230593 8257786 8230584 ...
##  $ payload_bytes_per_second: num  3.37 3.39 3.24 3.25 3.39 ...
##  $ fwd_subflow_pkts        : num  3 3 3 3 3 3 3 3 3 3 ...
##  $ bwd_subflow_pkts        : num  1.67 1.67 1.67 1.67 1.67 ...
##  $ fwd_subflow_bytes       : num  25.3 25.3 24.7 24.7 25.3 ...
##  $ bwd_subflow_bytes       : num  10.7 10.7 10 10 10.7 ...
##  $ fwd_bulk_bytes          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bwd_bulk_bytes          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fwd_bulk_packets        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bwd_bulk_packets        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fwd_bulk_rate           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bwd_bulk_rate           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ active.min              : num  2282415 2028307 2281904 2047288 2087657 ...
##  $ active.max              : num  2282415 2028307 2281904 2047288 2087657 ...
##  $ active.tot              : num  2282415 2028307 2281904 2047288 2087657 ...
##  $ active.avg              : num  2282415 2028307 2281904 2047288 2087657 ...
##  $ active.std              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ idle.min                : num  29729183 29855277 29842149 29913775 29814705 ...
##  $ idle.max                : num  29729183 29855277 29842149 29913775 29814705 ...
##  $ idle.tot                : num  29729183 29855277 29842149 29913775 29814705 ...
##  $ idle.avg                : num  29729183 29855277 29842149 29913775 29814705 ...
##  $ idle.std                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fwd_init_window_size    : int  64240 64240 64240 64240 64240 64240 64240 64240 64240 64240 ...
##  $ bwd_init_window_size    : int  26847 26847 26847 26847 26847 26847 26847 26847 26847 26847 ...
##  $ fwd_last_window_size    : int  502 502 502 502 502 502 502 502 502 502 ...
##  $ Attack_type             : chr  "MQTT_Publish" "MQTT_Publish" "MQTT_Publish" "MQTT_Publish" ...
## NULL
   summary(RT_IOT2022)
##        X           id.orig_p       id.resp_p        proto          
##  Min.   :    0   Min.   :    0   Min.   :    0   Length:123117     
##  1st Qu.: 6059   1st Qu.:17702   1st Qu.:   21   Class :character  
##  Median :33100   Median :37221   Median :   21   Mode  :character  
##  Mean   :37035   Mean   :34639   Mean   : 1014                     
##  3rd Qu.:63879   3rd Qu.:50971   3rd Qu.:   21                     
##  Max.   :94658   Max.   :65535   Max.   :65389                     
##    service          flow_duration       fwd_pkts_tot       bwd_pkts_tot     
##  Length:123117      Min.   :    0.00   Min.   :   0.000   Min.   :    0.00  
##  Class :character   1st Qu.:    0.00   1st Qu.:   1.000   1st Qu.:    1.00  
##  Mode  :character   Median :    0.00   Median :   1.000   Median :    1.00  
##                     Mean   :    3.81   Mean   :   2.269   Mean   :    1.91  
##                     3rd Qu.:    0.00   3rd Qu.:   1.000   3rd Qu.:    1.00  
##                     Max.   :21728.34   Max.   :4345.000   Max.   :10112.00  
##  fwd_data_pkts_tot  bwd_data_pkts_tot  fwd_pkts_per_sec    bwd_pkts_per_sec   
##  Min.   :   0.000   Min.   :    0.00   Min.   :      0.0   Min.   :      0.0  
##  1st Qu.:   1.000   1st Qu.:    0.00   1st Qu.:     74.5   1st Qu.:     72.9  
##  Median :   1.000   Median :    0.00   Median : 246723.8   Median : 246723.8  
##  Mean   :   1.471   Mean   :    0.82   Mean   : 351806.3   Mean   : 351762.0  
##  3rd Qu.:   1.000   3rd Qu.:    0.00   3rd Qu.: 524288.0   3rd Qu.: 524288.0  
##  Max.   :4345.000   Max.   :10105.00   Max.   :1048576.0   Max.   :1048576.0  
##  flow_pkts_per_sec   down_up_ratio    fwd_header_size_tot fwd_header_size_min
##  Min.   :      0.0   Min.   :0.0000   Min.   :    0.00    Min.   : 0.00      
##  1st Qu.:    149.1   1st Qu.:1.0000   1st Qu.:   20.00    1st Qu.:20.00      
##  Median : 493447.5   Median :1.0000   Median :   20.00    Median :20.00      
##  Mean   : 703568.3   Mean   :0.8546   Mean   :   53.89    Mean   :19.78      
##  3rd Qu.:1048576.0   3rd Qu.:1.0000   3rd Qu.:   20.00    3rd Qu.:20.00      
##  Max.   :2097152.0   Max.   :6.0879   Max.   :69296.00    Max.   :44.00      
##  fwd_header_size_max bwd_header_size_tot bwd_header_size_min
##  Min.   : 0.00       Min.   :     0.0    Min.   : 0.0       
##  1st Qu.:20.00       1st Qu.:    20.0    1st Qu.:20.0       
##  Median :20.00       Median :    20.0    Median :20.0       
##  Mean   :20.65       Mean   :    46.6    Mean   :17.7       
##  3rd Qu.:20.00       3rd Qu.:    20.0    3rd Qu.:20.0       
##  Max.   :52.00       Max.   :323592.0    Max.   :40.0       
##  bwd_header_size_max flow_FIN_flag_count flow_SYN_flag_count
##  Min.   : 0.00       Min.   : 0.0000     Min.   :0.0000     
##  1st Qu.:20.00       1st Qu.: 0.0000     1st Qu.:1.0000     
##  Median :20.00       Median : 0.0000     Median :1.0000     
##  Mean   :18.43       Mean   : 0.1156     Mean   :0.9509     
##  3rd Qu.:20.00       3rd Qu.: 0.0000     3rd Qu.:1.0000     
##  Max.   :44.00       Max.   :10.0000     Max.   :8.0000     
##  flow_RST_flag_count fwd_PSH_flag_count bwd_PSH_flag_count  flow_ACK_flag_count
##  Min.   : 0.0000     Min.   :  0.0000   Min.   :   0.0000   Min.   :    0.000  
##  1st Qu.: 1.0000     1st Qu.:  0.0000   1st Qu.:   0.0000   1st Qu.:    1.000  
##  Median : 1.0000     Median :  0.0000   Median :   0.0000   Median :    1.000  
##  Mean   : 0.7965     Mean   :  0.3513   Mean   :   0.3936   Mean   :    2.678  
##  3rd Qu.: 1.0000     3rd Qu.:  0.0000   3rd Qu.:   0.0000   3rd Qu.:    1.000  
##  Max.   :10.0000     Max.   :864.0000   Max.   :1446.0000   Max.   :11772.000  
##  fwd_URG_flag_count bwd_URG_flag_count flow_CWR_flag_count flow_ECE_flag_count
##  Min.   :0.00000    Min.   :0          Min.   :0.000000    Min.   :0.000000   
##  1st Qu.:0.00000    1st Qu.:0          1st Qu.:0.000000    1st Qu.:0.000000   
##  Median :0.00000    Median :0          Median :0.000000    Median :0.000000   
##  Mean   :0.01629    Mean   :0          Mean   :0.001007    Mean   :0.000699   
##  3rd Qu.:0.00000    3rd Qu.:0          3rd Qu.:0.000000    3rd Qu.:0.000000   
##  Max.   :1.00000    Max.   :0          Max.   :4.000000    Max.   :4.000000   
##  fwd_pkts_payload.min fwd_pkts_payload.max fwd_pkts_payload.tot
##  Min.   :   0.00      Min.   :   0.0       Min.   :     0.0    
##  1st Qu.: 120.00      1st Qu.: 120.0       1st Qu.:   120.0    
##  Median : 120.00      Median : 120.0       Median :   120.0    
##  Mean   :  96.26      Mean   : 120.7       Mean   :   221.5    
##  3rd Qu.: 120.00      3rd Qu.: 120.0       3rd Qu.:   120.0    
##  Max.   :1097.00      Max.   :1420.0       Max.   :747340.0    
##  fwd_pkts_payload.avg fwd_pkts_payload.std bwd_pkts_payload.min
##  Min.   :   0.0       Min.   :  0.000      Min.   :   0.000    
##  1st Qu.: 120.0       1st Qu.:  0.000      1st Qu.:   0.000    
##  Median : 120.0       Median :  0.000      Median :   0.000    
##  Mean   : 100.5       Mean   :  8.108      Mean   :   3.817    
##  3rd Qu.: 120.0       3rd Qu.:  0.000      3rd Qu.:   0.000    
##  Max.   :1319.4       Max.   :731.579      Max.   :1357.000    
##  bwd_pkts_payload.max bwd_pkts_payload.tot bwd_pkts_payload.avg
##  Min.   :   0.0       Min.   :       0     Min.   :   0.00     
##  1st Qu.:   0.0       1st Qu.:       0     1st Qu.:   0.00     
##  Median :   0.0       Median :       0     Median :   0.00     
##  Mean   :  52.4       Mean   :     513     Mean   :  18.79     
##  3rd Qu.:   0.0       3rd Qu.:       0     3rd Qu.:   0.00     
##  Max.   :5124.0       Max.   :13610415     Max.   :1457.05     
##  bwd_pkts_payload.std flow_pkts_payload.min flow_pkts_payload.max
##  Min.   :   0.00      Min.   :   0.00       Min.   :   0.0       
##  1st Qu.:   0.00      1st Qu.:   0.00       1st Qu.: 120.0       
##  Median :   0.00      Median :   0.00       Median : 120.0       
##  Mean   :  20.55      Mean   :  13.55       Mean   : 148.5       
##  3rd Qu.:   0.00      3rd Qu.:   0.00       3rd Qu.: 120.0       
##  Max.   :1506.01      Max.   :1097.00       Max.   :5124.0       
##  flow_pkts_payload.tot flow_pkts_payload.avg flow_pkts_payload.std
##  Min.   :       0      Min.   :   0.00       Min.   :  0.00       
##  1st Qu.:     120      1st Qu.:  60.00       1st Qu.: 50.22       
##  Median :     120      Median :  60.00       Median : 84.85       
##  Mean   :     735      Mean   :  65.01       Mean   : 76.04       
##  3rd Qu.:     120      3rd Qu.:  60.00       3rd Qu.: 84.85       
##  Max.   :13610585      Max.   :1156.08       Max.   :924.65       
##   fwd_iat.min         fwd_iat.max         fwd_iat.tot       
##  Min.   :        0   Min.   :        0   Min.   :0.000e+00  
##  1st Qu.:        0   1st Qu.:        0   1st Qu.:0.000e+00  
##  Median :        0   Median :        0   Median :0.000e+00  
##  Mean   :     8843   Mean   :  1721566   Mean   :3.780e+06  
##  3rd Qu.:        0   3rd Qu.:        0   3rd Qu.:0.000e+00  
##  Max.   :300252571   Max.   :300252571   Max.   :2.173e+10  
##   fwd_iat.avg         fwd_iat.std         bwd_iat.min        bwd_iat.max       
##  Min.   :        0   Min.   :        0   Min.   :       0   Min.   :        0  
##  1st Qu.:        0   1st Qu.:        0   1st Qu.:       0   1st Qu.:        0  
##  Median :        0   Median :        0   Median :       0   Median :        0  
##  Mean   :   237357   Mean   :   577557   Mean   :    3765   Mean   :   407727  
##  3rd Qu.:        0   3rd Qu.:        0   3rd Qu.:       0   3rd Qu.:        0  
##  Max.   :300252571   Max.   :212296532   Max.   :43196220   Max.   :300028179  
##   bwd_iat.tot         bwd_iat.avg         bwd_iat.std         flow_iat.min     
##  Min.   :0.000e+00   Min.   :        0   Min.   :        0   Min.   :       0  
##  1st Qu.:0.000e+00   1st Qu.:        0   1st Qu.:        0   1st Qu.:       1  
##  Median :0.000e+00   Median :        0   Median :        0   Median :       4  
##  Mean   :1.780e+06   Mean   :    87652   Mean   :   147480   Mean   :    4283  
##  3rd Qu.:0.000e+00   3rd Qu.:        0   3rd Qu.:        0   3rd Qu.:       5  
##  Max.   :1.876e+10   Max.   :150148934   Max.   :211961260   Max.   :43510042  
##   flow_iat.max        flow_iat.tot        flow_iat.avg       flow_iat.std      
##  Min.   :        0   Min.   :0.000e+00   Min.   :       0   Min.   :        0  
##  1st Qu.:        1   1st Qu.:1.000e+00   1st Qu.:       1   1st Qu.:        0  
##  Median :        4   Median :4.000e+00   Median :       4   Median :        0  
##  Mean   :  1725999   Mean   :3.811e+06   Mean   :  139654   Mean   :   450136  
##  3rd Qu.:        5   3rd Qu.:5.000e+00   3rd Qu.:       5   3rd Qu.:        0  
##  Max.   :299999988   Max.   :2.173e+10   Max.   :72835758   Max.   :134122073  
##  payload_bytes_per_second fwd_subflow_pkts  bwd_subflow_pkts  
##  Min.   :        0        Min.   :  0.000   Min.   :   0.000  
##  1st Qu.:     2581        1st Qu.:  1.000   1st Qu.:   1.000  
##  Median : 29606852        Median :  1.000   Median :   1.000  
##  Mean   : 41053452        Mean   :  1.552   Mean   :   1.338  
##  3rd Qu.: 55924053        3rd Qu.:  1.000   3rd Qu.:   1.000  
##  Max.   :125829120        Max.   :276.833   Max.   :1685.333  
##  fwd_subflow_bytes bwd_subflow_bytes   fwd_bulk_bytes     bwd_bulk_bytes   
##  Min.   :    0.0   Min.   :      0.0   Min.   :     0.0   Min.   :      0  
##  1st Qu.:  120.0   1st Qu.:      0.0   1st Qu.:     0.0   1st Qu.:      0  
##  Median :  120.0   Median :      0.0   Median :     0.0   Median :      0  
##  Mean   :  136.5   Mean   :    217.5   Mean   :    19.2   Mean   :    155  
##  3rd Qu.:  120.0   3rd Qu.:      0.0   3rd Qu.:     0.0   3rd Qu.:      0  
##  Max.   :52067.8   Max.   :2268402.5   Max.   :465095.0   Max.   :6805208  
##  fwd_bulk_packets   bwd_bulk_packets   fwd_bulk_rate      bwd_bulk_rate     
##  Min.   :  0.0000   Min.   :   0.000   Min.   :       0   Min.   :       0  
##  1st Qu.:  0.0000   1st Qu.:   0.000   1st Qu.:       0   1st Qu.:       0  
##  Median :  0.0000   Median :   0.000   Median :       0   Median :       0  
##  Mean   :  0.0241   Mean   :   0.131   Mean   :    3836   Mean   :   48415  
##  3rd Qu.:  0.0000   3rd Qu.:   0.000   3rd Qu.:       0   3rd Qu.:       0  
##  Max.   :343.0000   Max.   :5052.500   Max.   :46336283   Max.   :28300874  
##    active.min          active.max          active.tot       
##  Min.   :        0   Min.   :        0   Min.   :0.000e+00  
##  1st Qu.:        1   1st Qu.:        1   1st Qu.:1.000e+00  
##  Median :        4   Median :        4   Median :4.000e+00  
##  Mean   :   133155   Mean   :   178590   Mean   :2.929e+05  
##  3rd Qu.:        5   3rd Qu.:        5   3rd Qu.:5.000e+00  
##  Max.   :312507974   Max.   :848097909   Max.   :2.945e+09  
##    active.avg          active.std           idle.min        
##  Min.   :        0   Min.   :        0   Min.   :        0  
##  1st Qu.:        1   1st Qu.:        0   1st Qu.:        0  
##  Median :        4   Median :        0   Median :        0  
##  Mean   :   148135   Mean   :    23536   Mean   :  1616655  
##  3rd Qu.:        5   3rd Qu.:        0   3rd Qu.:        0  
##  Max.   :437493062   Max.   :477486236   Max.   :299999988  
##     idle.max            idle.tot            idle.avg        
##  Min.   :        0   Min.   :0.000e+00   Min.   :        0  
##  1st Qu.:        0   1st Qu.:0.000e+00   1st Qu.:        0  
##  Median :        0   Median :0.000e+00   Median :        0  
##  Mean   :  1701956   Mean   :3.518e+06   Mean   :  1664985  
##  3rd Qu.:        0   3rd Qu.:0.000e+00   3rd Qu.:        0  
##  Max.   :299999988   Max.   :2.097e+10   Max.   :299999988  
##     idle.std         fwd_init_window_size bwd_init_window_size
##  Min.   :        0   Min.   :    0        Min.   :    0       
##  1st Qu.:        0   1st Qu.:   64        1st Qu.:    0       
##  Median :        0   Median :   64        Median :    0       
##  Mean   :    45502   Mean   : 6119        Mean   : 2740       
##  3rd Qu.:        0   3rd Qu.:   64        3rd Qu.:    0       
##  Max.   :120802871   Max.   :65535        Max.   :65535       
##  fwd_last_window_size Attack_type       
##  Min.   :    0.0      Length:123117     
##  1st Qu.:   64.0      Class :character  
##  Median :   64.0      Mode  :character  
##  Mean   :  751.6                        
##  3rd Qu.:   64.0                        
##  Max.   :65535.0
#PhiUSIIL_Phishing_URL_Dataset **
library(readr)
PhiUSIIL_Phishing_URL_Dataset <- read.csv("~/Desktop/NCU/DissertationDatasets/PhiUSIIL_Phishing_URL_Dataset.csv")
   head(str(PhiUSIIL_Phishing_URL_Dataset))
## 'data.frame':    235795 obs. of  56 variables:
##  $ FILENAME                  : chr  "521848.txt" "31372.txt" "597387.txt" "554095.txt" ...
##  $ URL                       : chr  "https://www.southbankmosaics.com" "https://www.uni-mainz.de" "https://www.voicefmradio.co.uk" "https://www.sfnmjournal.com" ...
##  $ URLLength                 : int  31 23 29 26 33 30 25 25 29 18 ...
##  $ Domain                    : chr  "www.southbankmosaics.com" "www.uni-mainz.de" "www.voicefmradio.co.uk" "www.sfnmjournal.com" ...
##  $ DomainLength              : int  24 16 22 19 26 23 18 18 22 11 ...
##  $ IsDomainIP                : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ TLD                       : chr  "com" "de" "uk" "com" ...
##  $ URLSimilarityIndex        : num  100 100 100 100 100 100 100 100 100 100 ...
##  $ CharContinuationRate      : num  1 0.667 0.867 1 1 ...
##  $ TLDLegitimateProb         : num  0.5229 0.0327 0.0286 0.5229 0.08 ...
##  $ URLCharProb               : num  0.0619 0.0502 0.0641 0.0576 0.0594 ...
##  $ TLDLength                 : int  3 2 2 3 3 3 3 3 2 3 ...
##  $ NoOfSubDomain             : int  1 1 2 1 1 1 1 1 1 1 ...
##  $ HasObfuscation            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NoOfObfuscatedChar        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ ObfuscationRatio          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ NoOfLettersInURL          : int  18 9 15 13 20 17 12 12 16 5 ...
##  $ LetterRatioInURL          : num  0.581 0.391 0.517 0.5 0.606 0.567 0.48 0.48 0.552 0.278 ...
##  $ NoOfDegitsInURL           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ DegitRatioInURL           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ NoOfEqualsInURL           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NoOfQMarkInURL            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NoOfAmpersandInURL        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NoOfOtherSpecialCharsInURL: int  1 2 2 1 1 1 1 1 1 1 ...
##  $ SpacialCharRatioInURL     : num  0.032 0.087 0.069 0.038 0.03 0.033 0.04 0.04 0.034 0.056 ...
##  $ IsHTTPS                   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ LineOfCode                : int  558 618 467 6356 6089 1210 1024 514 2371 2730 ...
##  $ LargestLineLength         : int  9381 9381 682 26824 28404 737 984 399 12913 481 ...
##  $ HasTitle                  : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Title                     : chr  "ข่าวสด ข่าววันนี้ ข่าวà¸\u0081ีฬา ข่าวบันเทิà"| __truncated__ "johannes gutenberg-universität mainz" "voice fm southampton" "home page: seminars in fetal and neonatal medicine " ...
##  $ DomainTitleMatchScore     : num  0 55.6 46.7 0 100 ...
##  $ URLTitleMatchScore        : num  0 55.6 46.7 0 100 ...
##  $ HasFavicon                : int  0 1 0 0 0 0 1 1 0 1 ...
##  $ Robots                    : int  1 1 1 1 1 0 0 0 1 1 ...
##  $ IsResponsive              : int  1 0 1 1 1 1 1 1 1 1 ...
##  $ NoOfURLRedirect           : int  0 0 0 0 1 0 1 0 0 0 ...
##  $ NoOfSelfRedirect          : int  0 0 0 0 1 0 1 0 0 0 ...
##  $ HasDescription            : int  0 0 1 0 1 1 0 1 1 1 ...
##  $ NoOfPopup                 : int  0 0 0 1 0 1 2 0 0 0 ...
##  $ NoOfiFrame                : int  1 0 0 12 2 1 4 1 0 2 ...
##  $ HasExternalFormSubmit     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ HasSocialNet              : int  0 1 0 1 1 1 1 1 1 1 ...
##  $ HasSubmitButton           : int  1 1 1 1 1 0 0 1 1 0 ...
##  $ HasHiddenFields           : int  1 0 1 1 1 1 1 0 1 0 ...
##  $ HasPasswordField          : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ Bank                      : int  1 0 0 0 1 0 0 0 0 0 ...
##  $ Pay                       : int  0 0 0 1 1 0 0 0 0 0 ...
##  $ Crypto                    : int  0 0 0 1 0 0 0 0 0 0 ...
##  $ HasCopyrightInfo          : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ NoOfImage                 : int  34 50 10 3 244 35 32 24 71 10 ...
##  $ NoOfCSS                   : int  20 9 2 27 15 1 4 2 4 1 ...
##  $ NoOfJS                    : int  28 8 7 15 34 11 14 22 9 12 ...
##  $ NoOfSelfRef               : int  119 39 42 22 72 86 44 36 40 173 ...
##  $ NoOfEmptyRef              : int  0 0 2 1 1 0 2 0 1 6 ...
##  $ NoOfExternalRef           : int  124 217 5 31 85 14 17 15 317 65 ...
##  $ label                     : int  1 1 1 1 1 1 1 1 1 1 ...
## NULL
   summary(PhiUSIIL_Phishing_URL_Dataset)
##    FILENAME             URL              URLLength          Domain         
##  Length:235795      Length:235795      Min.   :  13.00   Length:235795     
##  Class :character   Class :character   1st Qu.:  23.00   Class :character  
##  Mode  :character   Mode  :character   Median :  27.00   Mode  :character  
##                                        Mean   :  34.57                     
##                                        3rd Qu.:  34.00                     
##                                        Max.   :6097.00                     
##   DomainLength      IsDomainIP           TLD            URLSimilarityIndex
##  Min.   :  4.00   Min.   :0.000000   Length:235795      Min.   :  0.1556  
##  1st Qu.: 16.00   1st Qu.:0.000000   Class :character   1st Qu.: 57.0248  
##  Median : 20.00   Median :0.000000   Mode  :character   Median :100.0000  
##  Mean   : 21.47   Mean   :0.002706                      Mean   : 78.4308  
##  3rd Qu.: 24.00   3rd Qu.:0.000000                      3rd Qu.:100.0000  
##  Max.   :110.00   Max.   :1.000000                      Max.   :100.0000  
##  CharContinuationRate TLDLegitimateProb   URLCharProb         TLDLength     
##  Min.   :0.0000       Min.   :0.000000   Min.   :0.001083   Min.   : 2.000  
##  1st Qu.:0.6800       1st Qu.:0.005977   1st Qu.:0.050747   1st Qu.: 2.000  
##  Median :1.0000       Median :0.079963   Median :0.057970   Median : 3.000  
##  Mean   :0.8455       Mean   :0.260423   Mean   :0.055747   Mean   : 2.764  
##  3rd Qu.:1.0000       3rd Qu.:0.522907   3rd Qu.:0.062875   3rd Qu.: 3.000  
##  Max.   :1.0000       Max.   :0.522907   Max.   :0.090824   Max.   :13.000  
##  NoOfSubDomain    HasObfuscation     NoOfObfuscatedChar ObfuscationRatio   
##  Min.   : 0.000   Min.   :0.000000   Min.   :  0.0000   Min.   :0.0000000  
##  1st Qu.: 1.000   1st Qu.:0.000000   1st Qu.:  0.0000   1st Qu.:0.0000000  
##  Median : 1.000   Median :0.000000   Median :  0.0000   Median :0.0000000  
##  Mean   : 1.165   Mean   :0.002057   Mean   :  0.0249   Mean   :0.0001384  
##  3rd Qu.: 1.000   3rd Qu.:0.000000   3rd Qu.:  0.0000   3rd Qu.:0.0000000  
##  Max.   :10.000   Max.   :1.000000   Max.   :447.0000   Max.   :0.3480000  
##  NoOfLettersInURL  LetterRatioInURL NoOfDegitsInURL    DegitRatioInURL  
##  Min.   :   0.00   Min.   :0.0000   Min.   :   0.000   Min.   :0.00000  
##  1st Qu.:  10.00   1st Qu.:0.4350   1st Qu.:   0.000   1st Qu.:0.00000  
##  Median :  14.00   Median :0.5190   Median :   0.000   Median :0.00000  
##  Mean   :  19.43   Mean   :0.5159   Mean   :   1.881   Mean   :0.02862  
##  3rd Qu.:  20.00   3rd Qu.:0.5940   3rd Qu.:   0.000   3rd Qu.:0.00000  
##  Max.   :5191.00   Max.   :0.9260   Max.   :2011.000   Max.   :0.68400  
##  NoOfEqualsInURL     NoOfQMarkInURL   NoOfAmpersandInURL 
##  Min.   :  0.00000   Min.   :0.0000   Min.   :  0.00000  
##  1st Qu.:  0.00000   1st Qu.:0.0000   1st Qu.:  0.00000  
##  Median :  0.00000   Median :0.0000   Median :  0.00000  
##  Mean   :  0.06224   Mean   :0.0294   Mean   :  0.02506  
##  3rd Qu.:  0.00000   3rd Qu.:0.0000   3rd Qu.:  0.00000  
##  Max.   :176.00000   Max.   :4.0000   Max.   :149.00000  
##  NoOfOtherSpecialCharsInURL SpacialCharRatioInURL    IsHTTPS      
##  Min.   :  0.00             Min.   :0.00000       Min.   :0.0000  
##  1st Qu.:  1.00             1st Qu.:0.03800       1st Qu.:1.0000  
##  Median :  1.00             Median :0.05000       Median :1.0000  
##  Mean   :  2.34             Mean   :0.06331       Mean   :0.7826  
##  3rd Qu.:  3.00             3rd Qu.:0.08300       3rd Qu.:1.0000  
##  Max.   :499.00             Max.   :0.39700       Max.   :1.0000  
##    LineOfCode     LargestLineLength     HasTitle         Title          
##  Min.   :     2   Min.   :      22   Min.   :0.0000   Length:235795     
##  1st Qu.:    18   1st Qu.:     200   1st Qu.:1.0000   Class :character  
##  Median :   429   Median :    1090   Median :1.0000   Mode  :character  
##  Mean   :  1142   Mean   :   12790   Mean   :0.8613                     
##  3rd Qu.:  1277   3rd Qu.:    8047   3rd Qu.:1.0000                     
##  Max.   :442666   Max.   :13975732   Max.   :1.0000                     
##  DomainTitleMatchScore URLTitleMatchScore   HasFavicon         Robots      
##  Min.   :  0.00        Min.   :  0.00     Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:  0.00        1st Qu.:  0.00     1st Qu.:0.0000   1st Qu.:0.0000  
##  Median : 75.00        Median :100.00     Median :0.0000   Median :0.0000  
##  Mean   : 50.13        Mean   : 52.12     Mean   :0.3618   Mean   :0.2665  
##  3rd Qu.:100.00        3rd Qu.:100.00     3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :100.00        Max.   :100.00     Max.   :1.0000   Max.   :1.0000  
##   IsResponsive    NoOfURLRedirect  NoOfSelfRedirect  HasDescription  
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.0000  
##  Median :1.0000   Median :0.0000   Median :0.00000   Median :0.0000  
##  Mean   :0.6245   Mean   :0.1334   Mean   :0.04011   Mean   :0.4402  
##  3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.00000   Max.   :1.0000  
##    NoOfPopup          NoOfiFrame       HasExternalFormSubmit  HasSocialNet   
##  Min.   :  0.0000   Min.   :   0.000   Min.   :0.00000       Min.   :0.0000  
##  1st Qu.:  0.0000   1st Qu.:   0.000   1st Qu.:0.00000       1st Qu.:0.0000  
##  Median :  0.0000   Median :   0.000   Median :0.00000       Median :0.0000  
##  Mean   :  0.2218   Mean   :   1.589   Mean   :0.04399       Mean   :0.4566  
##  3rd Qu.:  0.0000   3rd Qu.:   1.000   3rd Qu.:0.00000       3rd Qu.:1.0000  
##  Max.   :602.0000   Max.   :1602.000   Max.   :1.00000       Max.   :1.0000  
##  HasSubmitButton  HasHiddenFields  HasPasswordField      Bank       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.4143   Mean   :0.3778   Mean   :0.1023   Mean   :0.1271  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##       Pay            Crypto        HasCopyrightInfo   NoOfImage      
##  Min.   :0.000   Min.   :0.00000   Min.   :0.0000   Min.   :   0.00  
##  1st Qu.:0.000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:   0.00  
##  Median :0.000   Median :0.00000   Median :0.0000   Median :   8.00  
##  Mean   :0.237   Mean   :0.02347   Mean   :0.4868   Mean   :  26.08  
##  3rd Qu.:0.000   3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:  29.00  
##  Max.   :1.000   Max.   :1.00000   Max.   :1.0000   Max.   :8956.00  
##     NoOfCSS             NoOfJS         NoOfSelfRef        NoOfEmptyRef     
##  Min.   :    0.00   Min.   :   0.00   Min.   :    0.00   Min.   :   0.000  
##  1st Qu.:    0.00   1st Qu.:   0.00   1st Qu.:    0.00   1st Qu.:   0.000  
##  Median :    2.00   Median :   6.00   Median :   12.00   Median :   0.000  
##  Mean   :    6.33   Mean   :  10.52   Mean   :   65.07   Mean   :   2.378  
##  3rd Qu.:    8.00   3rd Qu.:  15.00   3rd Qu.:   88.00   3rd Qu.:   1.000  
##  Max.   :35820.00   Max.   :6957.00   Max.   :27397.00   Max.   :4887.000  
##  NoOfExternalRef        label       
##  Min.   :    0.00   Min.   :0.0000  
##  1st Qu.:    1.00   1st Qu.:0.0000  
##  Median :   10.00   Median :1.0000  
##  Mean   :   49.26   Mean   :0.5719  
##  3rd Qu.:   57.00   3rd Qu.:1.0000  
##  Max.   :27516.00   Max.   :1.0000
#in_vehicle_coupon_recommendation **
library(readr)
in.vehicle.coupon.recommendation <- read.csv("~/Desktop/NCU/DissertationDatasets/in-vehicle-coupon-recommendation.csv", header=TRUE)
  head(str(in.vehicle.coupon.recommendation))
## 'data.frame':    12684 obs. of  26 variables:
##  $ destination         : chr  "No Urgent Place" "No Urgent Place" "No Urgent Place" "No Urgent Place" ...
##  $ passanger           : chr  "Alone" "Friend(s)" "Friend(s)" "Friend(s)" ...
##  $ weather             : chr  "Sunny" "Sunny" "Sunny" "Sunny" ...
##  $ temperature         : int  55 80 80 80 80 80 55 80 80 80 ...
##  $ time                : chr  "2PM" "10AM" "10AM" "2PM" ...
##  $ coupon              : chr  "Restaurant(<20)" "Coffee House" "Carry out & Take away" "Coffee House" ...
##  $ expiration          : chr  "1d" "2h" "2h" "2h" ...
##  $ gender              : chr  "Female" "Female" "Female" "Female" ...
##  $ age                 : chr  "21" "21" "21" "21" ...
##  $ maritalStatus       : chr  "Unmarried partner" "Unmarried partner" "Unmarried partner" "Unmarried partner" ...
##  $ has_children        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ education           : chr  "Some college - no degree" "Some college - no degree" "Some college - no degree" "Some college - no degree" ...
##  $ occupation          : chr  "Unemployed" "Unemployed" "Unemployed" "Unemployed" ...
##  $ income              : chr  "$37500 - $49999" "$37500 - $49999" "$37500 - $49999" "$37500 - $49999" ...
##  $ car                 : chr  "" "" "" "" ...
##  $ Bar                 : chr  "never" "never" "never" "never" ...
##  $ CoffeeHouse         : chr  "never" "never" "never" "never" ...
##  $ CarryAway           : chr  "" "" "" "" ...
##  $ RestaurantLessThan20: chr  "4~8" "4~8" "4~8" "4~8" ...
##  $ Restaurant20To50    : chr  "1~3" "1~3" "1~3" "1~3" ...
##  $ toCoupon_GEQ5min    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ toCoupon_GEQ15min   : int  0 0 1 1 1 1 1 1 1 1 ...
##  $ toCoupon_GEQ25min   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ direction_same      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ direction_opp       : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Y                   : int  1 0 1 0 0 1 1 1 1 0 ...
## NULL
  summary(in.vehicle.coupon.recommendation)
##  destination         passanger           weather           temperature  
##  Length:12684       Length:12684       Length:12684       Min.   :30.0  
##  Class :character   Class :character   Class :character   1st Qu.:55.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :80.0  
##                                                           Mean   :63.3  
##                                                           3rd Qu.:80.0  
##                                                           Max.   :80.0  
##      time              coupon           expiration           gender         
##  Length:12684       Length:12684       Length:12684       Length:12684      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##      age            maritalStatus       has_children     education        
##  Length:12684       Length:12684       Min.   :0.0000   Length:12684      
##  Class :character   Class :character   1st Qu.:0.0000   Class :character  
##  Mode  :character   Mode  :character   Median :0.0000   Mode  :character  
##                                        Mean   :0.4141                     
##                                        3rd Qu.:1.0000                     
##                                        Max.   :1.0000                     
##   occupation           income              car                Bar           
##  Length:12684       Length:12684       Length:12684       Length:12684      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  CoffeeHouse         CarryAway         RestaurantLessThan20 Restaurant20To50  
##  Length:12684       Length:12684       Length:12684         Length:12684      
##  Class :character   Class :character   Class :character     Class :character  
##  Mode  :character   Mode  :character   Mode  :character     Mode  :character  
##                                                                               
##                                                                               
##                                                                               
##  toCoupon_GEQ5min toCoupon_GEQ15min toCoupon_GEQ25min direction_same  
##  Min.   :1        Min.   :0.0000    Min.   :0.0000    Min.   :0.0000  
##  1st Qu.:1        1st Qu.:0.0000    1st Qu.:0.0000    1st Qu.:0.0000  
##  Median :1        Median :1.0000    Median :0.0000    Median :0.0000  
##  Mean   :1        Mean   :0.5615    Mean   :0.1191    Mean   :0.2148  
##  3rd Qu.:1        3rd Qu.:1.0000    3rd Qu.:0.0000    3rd Qu.:0.0000  
##  Max.   :1        Max.   :1.0000    Max.   :1.0000    Max.   :1.0000  
##  direction_opp          Y         
##  Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:1.0000   1st Qu.:0.0000  
##  Median :1.0000   Median :1.0000  
##  Mean   :0.7852   Mean   :0.5684  
##  3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000
#HTRU_2library(readr)
HTRU_2 <- read.csv("~/Downloads/htru2/HTRU_2.csv", header=FALSE)
   head(str(HTRU_2))
## 'data.frame':    17898 obs. of  9 variables:
##  $ V1: num  140.6 102.5 103 136.8 88.7 ...
##  $ V2: num  55.7 58.9 39.3 57.2 40.7 ...
##  $ V3: num  -0.2346 0.4653 0.3233 -0.0684 0.6009 ...
##  $ V4: num  -0.7 -0.515 1.051 -0.636 1.123 ...
##  $ V5: num  3.2 1.68 3.12 3.64 1.18 ...
##  $ V6: num  19.1 14.9 21.7 21 11.5 ...
##  $ V7: num  7.98 10.58 7.74 6.9 14.27 ...
##  $ V8: num  74.2 127.4 63.2 53.6 252.6 ...
##  $ V9: int  0 0 0 0 0 0 0 0 0 0 ...
## NULL
   summary(HTRU_2)
##        V1                V2              V3                V4         
##  Min.   :  5.812   Min.   :24.77   Min.   :-1.8760   Min.   :-1.7919  
##  1st Qu.:100.930   1st Qu.:42.38   1st Qu.: 0.0271   1st Qu.:-0.1886  
##  Median :115.078   Median :46.95   Median : 0.2232   Median : 0.1987  
##  Mean   :111.080   Mean   :46.55   Mean   : 0.4779   Mean   : 1.7703  
##  3rd Qu.:127.086   3rd Qu.:51.02   3rd Qu.: 0.4733   3rd Qu.: 0.9278  
##  Max.   :192.617   Max.   :98.78   Max.   : 8.0695   Max.   :68.1016  
##        V5                 V6               V7               V8          
##  Min.   :  0.2132   Min.   :  7.37   Min.   :-3.139   Min.   :  -1.977  
##  1st Qu.:  1.9231   1st Qu.: 14.44   1st Qu.: 5.782   1st Qu.:  34.961  
##  Median :  2.8018   Median : 18.46   Median : 8.434   Median :  83.065  
##  Mean   : 12.6144   Mean   : 26.33   Mean   : 8.304   Mean   : 104.858  
##  3rd Qu.:  5.4643   3rd Qu.: 28.43   3rd Qu.:10.703   3rd Qu.: 139.309  
##  Max.   :223.3921   Max.   :110.64   Max.   :34.540   Max.   :1191.001  
##        V9         
##  Min.   :0.00000  
##  1st Qu.:0.00000  
##  Median :0.00000  
##  Mean   :0.09157  
##  3rd Qu.:0.00000  
##  Max.   :1.00000
#Internet Firewall Dataset
log2 <- read.csv("~/Desktop/NCU/DissertationDataSets2/log2.csv")
   head(str(log2))
## 'data.frame':    65532 obs. of  12 variables:
##  $ Source.Port         : int  57222 56258 6881 50553 50002 51465 60513 50049 52244 50627 ...
##  $ Destination.Port    : int  53 3389 50321 3389 443 443 47094 443 58774 443 ...
##  $ NAT.Source.Port     : int  54587 56258 43265 50553 45848 39975 45469 21285 2211 16215 ...
##  $ NAT.Destination.Port: int  53 3389 50321 3389 443 443 47094 443 58774 443 ...
##  $ Action              : chr  "allow" "allow" "allow" "allow" ...
##  $ Bytes               : int  177 4768 238 3327 25358 3961 320 7912 70 8256 ...
##  $ Bytes.Sent          : int  94 1600 118 1438 6778 1595 140 3269 70 1674 ...
##  $ Bytes.Received      : int  83 3168 120 1889 18580 2366 180 4643 0 6582 ...
##  $ Packets             : int  2 19 2 15 31 21 6 23 1 31 ...
##  $ Elapsed.Time..sec.  : int  30 17 1199 17 16 16 7 96 5 75 ...
##  $ pkts_sent           : int  1 10 1 8 13 12 3 12 1 15 ...
##  $ pkts_received       : int  1 9 1 7 18 9 3 11 0 16 ...
## NULL
   summary(log2)
##   Source.Port    Destination.Port NAT.Source.Port NAT.Destination.Port
##  Min.   :    0   Min.   :    0    Min.   :    0   Min.   :    0       
##  1st Qu.:49183   1st Qu.:   80    1st Qu.:    0   1st Qu.:    0       
##  Median :53776   Median :  445    Median : 8820   Median :   53       
##  Mean   :49392   Mean   :10577    Mean   :19283   Mean   : 2671       
##  3rd Qu.:58638   3rd Qu.:15000    3rd Qu.:38366   3rd Qu.:  443       
##  Max.   :65534   Max.   :65535    Max.   :65535   Max.   :65535       
##     Action              Bytes             Bytes.Sent        Bytes.Received     
##  Length:65532       Min.   :6.000e+01   Min.   :       60   Min.   :        0  
##  Class :character   1st Qu.:6.600e+01   1st Qu.:       66   1st Qu.:        0  
##  Mode  :character   Median :1.680e+02   Median :       90   Median :       79  
##                     Mean   :9.712e+04   Mean   :    22386   Mean   :    74738  
##                     3rd Qu.:7.520e+02   3rd Qu.:      210   3rd Qu.:      449  
##                     Max.   :1.269e+09   Max.   :948477220   Max.   :320881795  
##     Packets          Elapsed.Time..sec.   pkts_sent        pkts_received     
##  Min.   :      1.0   Min.   :    0.00   Min.   :     1.0   Min.   :     0.0  
##  1st Qu.:      1.0   1st Qu.:    0.00   1st Qu.:     1.0   1st Qu.:     0.0  
##  Median :      2.0   Median :   15.00   Median :     1.0   Median :     1.0  
##  Mean   :    102.9   Mean   :   65.83   Mean   :    41.4   Mean   :    61.5  
##  3rd Qu.:      6.0   3rd Qu.:   30.00   3rd Qu.:     3.0   3rd Qu.:     2.0  
##  Max.   :1036116.0   Max.   :10824.00   Max.   :747520.0   Max.   :327208.0
##Add Bayesian tests functions

#create function to conduct the Bayesian Sign Test
BayesianSignTest <- function(diffVector,rope_min,rope_max) {

  library(MCMCpack)

  samples <- 3000

  #build the vector 0.5 1 1 ....... 1 

  weights <- c(0.5,rep(1,length(diffVector)))

  #add the fake first observation in 0

  diffVector <- c (0, diffVector)  


  #for the moment we implement the sign test. Signedrank will follows

  probLeft <- mean (diffVector < rope_min)

  probRope <- mean (diffVector > rope_min & diffVector < rope_max)

  probRight <- mean (diffVector > rope_max)

  results = list ("probLeft"=probLeft, "probRope"=probRope,
                  
                  "probRight"=probRight)
  
  return (results)
}


##Create function to conduct Bayesian Signed Rank Test

BayesianSignedRank <- function(diffVector,rope_min,rope_max) {
  
  library(MCMCpack)
  
  samples <- 30000
  
  #build the vector 0.5 1 1 ....... 1
  weights <- c(0.5,rep(1,length(diffVector)))
  
  #add the fake first observation in 0
  diffVector <- c (0, diffVector)
  
  sampledWeights <- rdirichlet(samples,weights)
  
  winLeft <- vector(length = samples)
  winRope <- vector(length = samples)
  winRight <- vector(length = samples)
  
  for (rep in 1:samples){
    currentWeights <- sampledWeights[rep,]
    for (i in 1:length(currentWeights)){
      for (j in 1:length(currentWeights)){
        product= currentWeights[i] * currentWeights[j]
        if (diffVector[i]+diffVector[j] > (2*rope_max) ) {
          winRight[rep] <- winRight[rep] + product
        }
        else if (diffVector[i]+diffVector[j] > (2*rope_min) ) {
          winRope[rep] <- winRope[rep] + product
        }
        else {
          winLeft[rep] <- winLeft[rep] + product
        }

      }
    }
    maxWins=max(winRight[rep],winRope[rep],winLeft[rep])
    winners = (winRight[rep]==maxWins)*1 + (winRope[rep]==maxWins)*1 + (winLeft[rep]==maxWins)*1
    winRight[rep] <- (winRight[rep]==maxWins)*1/winners
    winRope[rep] <- (winRope[rep]==maxWins)*1/winners
    winLeft[rep] <- (winLeft[rep]==maxWins)*1/winners
  }
  
  
  results = list ("winLeft"=mean(winLeft), "winRope"=mean(winRope),
                  "winRight"=mean(winRight) )
  return (results)
  
}


#Create function to conduct the Bayesian Correlated t.test

#diff_a_b is a vector of differences between the two classifiers, on each fold of cross-validation.
#If you have done 10 runs of 10-folds cross-validation, you have 100 results for each classifier.
#You should have run cross-validation on the same folds for the two classifiers.
#Then diff_a_b is the difference fold-by-fold.

#rho is the correlation of the cross-validation results: 1/(number of folds)
#rope_min and rope_max are the lower and the upper bound of the rope
 
correlatedBayesianTtest <- function(diff_a_b,rho,rope_min,rope_max){
   if (rope_max < rope_min){
     stop("rope_max should be larger than rope_min")
   }
     
  delta <- mean(diff_a_b)
  n <- length(diff_a_b)
  df <- n-1
  stdX <- sd(diff_a_b)
  sp <- sd(diff_a_b)*sqrt(1/n + rho/(1-rho))
  p.left <- pt((rope_min - delta)/sp, df)
  p.rope <- pt((rope_max - delta)/sp, df)-p.left
  results <- list('left'=p.left,'rope'=p.rope,'right'=1-p.left-p.rope)
  return (results)
}
set.seed(16974)
###Prepare datasets for One hot encoding if necessary and Persistent homology of each dataset.
##One hot encoding for adult dataset
library(caret)

#define one-hot encoding function
dummy.adult <- dummyVars(" ~ .", data=adult)

#perform one-hot encoding on data frame
adult.one_hot_df <- data.frame(predict(dummy.adult, newdata=adult))


#str final data frame
head(str(adult.one_hot_df))
## 'data.frame':    32561 obs. of  110 variables:
##  $ V1                            : num  39 50 38 53 28 37 49 52 31 42 ...
##  $ V2..                          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Federal.gov                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Local.gov                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Never.worked               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Private                    : num  0 0 1 1 1 1 1 0 1 1 ...
##  $ V2.Self.emp.inc               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Self.emp.not.inc           : num  0 1 0 0 0 0 0 1 0 0 ...
##  $ V2.State.gov                  : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Without.pay                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V3                            : num  77516 83311 215646 234721 338409 ...
##  $ V4.10th                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.11th                       : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ V4.12th                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.1st.4th                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.5th.6th                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.7th.8th                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.9th                        : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ V4.Assoc.acdm                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Assoc.voc                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Bachelors                  : num  1 1 0 0 1 0 0 0 0 1 ...
##  $ V4.Doctorate                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.HS.grad                    : num  0 0 1 0 0 0 0 1 0 0 ...
##  $ V4.Masters                    : num  0 0 0 0 0 1 0 0 1 0 ...
##  $ V4.Preschool                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Prof.school                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Some.college               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V5                            : num  13 13 9 7 13 14 5 9 14 13 ...
##  $ V6.Divorced                   : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ V6.Married.AF.spouse          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V6.Married.civ.spouse         : num  0 1 0 1 1 1 0 1 0 1 ...
##  $ V6.Married.spouse.absent      : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ V6.Never.married              : num  1 0 0 0 0 0 0 0 1 0 ...
##  $ V6.Separated                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V6.Widowed                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7..                          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Adm.clerical               : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Armed.Forces               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Craft.repair               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Exec.managerial            : num  0 1 0 0 0 1 0 1 0 1 ...
##  $ V7.Farming.fishing            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Handlers.cleaners          : num  0 0 1 1 0 0 0 0 0 0 ...
##  $ V7.Machine.op.inspct          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Other.service              : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ V7.Priv.house.serv            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Prof.specialty             : num  0 0 0 0 1 0 0 0 1 0 ...
##  $ V7.Protective.serv            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Sales                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Tech.support               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Transport.moving           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V8.Husband                    : num  0 1 0 1 0 0 0 1 0 1 ...
##  $ V8.Not.in.family              : num  1 0 1 0 0 0 1 0 1 0 ...
##  $ V8.Other.relative             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V8.Own.child                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V8.Unmarried                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V8.Wife                       : num  0 0 0 0 1 1 0 0 0 0 ...
##  $ V9.Amer.Indian.Eskimo         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.Asian.Pac.Islander         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.Black                      : num  0 0 0 1 1 0 1 0 0 0 ...
##  $ V9.Other                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.White                      : num  1 1 1 0 0 1 0 1 1 1 ...
##  $ V10.Female                    : num  0 0 0 0 1 1 1 0 1 0 ...
##  $ V10.Male                      : num  1 1 1 1 0 0 0 1 0 1 ...
##  $ V11                           : num  2174 0 0 0 0 ...
##  $ V12                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V13                           : num  40 13 40 40 40 40 16 45 50 40 ...
##  $ V14..                         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Cambodia                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Canada                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.China                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Columbia                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Cuba                      : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ V14.Dominican.Republic        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Ecuador                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.El.Salvador               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.England                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.France                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Germany                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Greece                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Guatemala                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Haiti                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Holand.Netherlands        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Honduras                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Hong                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Hungary                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.India                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Iran                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Ireland                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Italy                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Jamaica                   : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ V14.Japan                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Laos                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Mexico                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Nicaragua                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Outlying.US.Guam.USVI.etc.: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Peru                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Philippines               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Poland                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Portugal                  : num  0 0 0 0 0 0 0 0 0 0 ...
##   [list output truncated]
## NULL
adult_df1<-adult[,15]
adult.one_hot_df1<-cbind(adult.one_hot_df,adult_df1)
adult.one_hot_df2<-adult.one_hot_df1[,c(1,11,28,64,65,66)]

##Persistent homology of adult dataset

#create a random sample of adult.one_hot dataset to see if a barcode and persistent diagram can resolve from size of the dataset. 

adult.one_hot_1000_df <- adult.one_hot_df[sample(nrow(adult.one_hot_df), size = 1000, replace = FALSE), ]
head(str(adult.one_hot_1000_df))
## 'data.frame':    1000 obs. of  110 variables:
##  $ V1                            : num  33 25 39 21 32 26 20 58 24 63 ...
##  $ V2..                          : num  0 0 0 0 0 0 1 1 0 0 ...
##  $ V2.Federal.gov                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Local.gov                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Never.worked               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Private                    : num  1 1 1 1 1 1 0 0 1 0 ...
##  $ V2.Self.emp.inc               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Self.emp.not.inc           : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ V2.State.gov                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V2.Without.pay                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V3                            : num  176992 105693 234901 198050 134886 ...
##  $ V4.10th                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.11th                       : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ V4.12th                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.1st.4th                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.5th.6th                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.7th.8th                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.9th                        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Assoc.acdm                 : num  0 0 1 1 0 0 0 0 0 0 ...
##  $ V4.Assoc.voc                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Bachelors                  : num  0 1 0 0 0 1 0 0 0 0 ...
##  $ V4.Doctorate                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.HS.grad                    : num  0 0 0 0 1 0 0 0 1 0 ...
##  $ V4.Masters                    : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Preschool                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Prof.school                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V4.Some.college               : num  0 0 0 0 0 0 1 0 0 1 ...
##  $ V5                            : num  14 13 12 12 9 13 10 7 9 10 ...
##  $ V6.Divorced                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V6.Married.AF.spouse          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V6.Married.civ.spouse         : num  1 0 0 0 1 0 0 1 0 1 ...
##  $ V6.Married.spouse.absent      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V6.Never.married              : num  0 1 0 1 0 1 1 0 1 0 ...
##  $ V6.Separated                  : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ V6.Widowed                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7..                          : num  0 0 0 0 0 0 1 1 0 0 ...
##  $ V7.Adm.clerical               : num  0 0 1 1 1 0 0 0 0 0 ...
##  $ V7.Armed.Forces               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Craft.repair               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Exec.managerial            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Farming.fishing            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Handlers.cleaners          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Machine.op.inspct          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Other.service              : num  0 0 0 0 0 0 0 0 1 1 ...
##  $ V7.Priv.house.serv            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Prof.specialty             : num  1 1 0 0 0 1 0 0 0 0 ...
##  $ V7.Protective.serv            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Sales                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Tech.support               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V7.Transport.moving           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V8.Husband                    : num  1 0 0 0 0 0 0 1 0 1 ...
##  $ V8.Not.in.family              : num  0 1 0 1 0 1 0 0 0 0 ...
##  $ V8.Other.relative             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V8.Own.child                  : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ V8.Unmarried                  : num  0 0 1 0 0 0 1 0 0 0 ...
##  $ V8.Wife                       : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ V9.Amer.Indian.Eskimo         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.Asian.Pac.Islander         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.Black                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.Other                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V9.White                      : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V10.Female                    : num  0 1 0 1 1 1 1 0 1 0 ...
##  $ V10.Male                      : num  1 0 1 0 0 0 0 1 0 1 ...
##  $ V11                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V12                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V13                           : num  40 40 40 25 40 40 20 16 25 48 ...
##  $ V14..                         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Cambodia                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Canada                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.China                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Columbia                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Cuba                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Dominican.Republic        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Ecuador                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.El.Salvador               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.England                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.France                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Germany                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Greece                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Guatemala                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Haiti                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Holand.Netherlands        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Honduras                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Hong                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Hungary                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.India                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Iran                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Ireland                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Italy                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Jamaica                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Japan                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Laos                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Mexico                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Nicaragua                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Outlying.US.Guam.USVI.etc.: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Peru                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Philippines               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Poland                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ V14.Portugal                  : num  0 0 0 0 0 0 0 0 0 0 ...
##   [list output truncated]
## NULL
# calculate persistent homology for adult.one_hot_1000_df
phom.adult.one_hot_1000_df <- calculate_homology(adult.one_hot_1000_df)

# plot barcode for adult.one_hot_1000_df
plot_barcode(phom.adult.one_hot_1000_df)

# plot persistent diagram of adult.one_hot_1000_df dataset
plot_persist(phom.adult.one_hot_1000_df)

##One hot encoding for DryBean Dataset dataset
library(caret)

#define one-hot encoding function
dummy_drybean <- dummyVars(" ~ .", data=Dry_Bean_Dataset)

#perform one-hot encoding on data frame
dry_bean_dataset_one_hot_df <- data.frame(predict(dummy_drybean, newdata=Dry_Bean_Dataset))


dry_bean_dataset_one_hot_1000_df <- dry_bean_dataset_one_hot_df[sample(nrow(dry_bean_dataset_one_hot_df), size = 1000, replace = FALSE), ]
head(str(dry_bean_dataset_one_hot_1000_df))
## 'data.frame':    1000 obs. of  23 variables:
##  $ Area           : num  39621 38687 36878 29154 69852 ...
##  $ Perimeter      : num  734 722 723 631 1095 ...
##  $ MajorAxisLength: num  256 266 264 234 378 ...
##  $ MinorAxisLength: num  197 185 178 159 236 ...
##  $ AspectRation   : num  1.3 1.44 1.48 1.47 1.6 ...
##  $ Eccentricity   : num  0.64 0.717 0.738 0.731 0.78 ...
##  $ ConvexArea     : num  40021 39030 37329 29499 70620 ...
##  $ EquivDiameter  : num  225 222 217 193 298 ...
##  $ Extent         : num  0.784 0.804 0.759 0.747 0.769 ...
##  $ Solidity       : num  0.99 0.991 0.988 0.988 0.989 ...
##  $ roundness      : num  0.925 0.932 0.887 0.92 0.732 ...
##  $ Compactness    : num  0.876 0.834 0.821 0.825 0.789 ...
##  $ ShapeFactor1   : num  0.00647 0.00688 0.00716 0.00801 0.00541 ...
##  $ ShapeFactor2   : num  0.00235 0.00205 0.002 0.00229 0.00129 ...
##  $ ShapeFactor3   : num  0.767 0.696 0.674 0.681 0.622 ...
##  $ ShapeFactor4   : num  0.999 0.998 0.998 0.998 0.995 ...
##  $ ClassBARBUNYA  : num  0 0 0 0 1 1 0 0 0 0 ...
##  $ ClassBOMBAY    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ ClassCALI      : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ ClassDERMASON  : num  0 0 1 1 0 0 0 0 0 0 ...
##  $ ClassHOROZ     : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ ClassSEKER     : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ ClassSIRA      : num  0 1 0 0 0 0 0 0 1 1 ...
## NULL
##Persistent Homology of DryBean dataset

# calculate persistent homology for DryBean Dataset
phom_drybean_df <- calculate_homology(dry_bean_dataset_one_hot_1000_df)


# plot barcode for DryBean Dataset
plot_barcode(phom_drybean_df)

# plot persistent diagram of DryBean Dataset
plot_persist(phom_drybean_df)

##Persistent Homology of Taiwanese Bankruptcy dataset

taiwanese_data<-data

# calculate persistent homology for Taiwanese Bankruptcy Dataset
phom_taiwanese_data_df <- calculate_homology(taiwanese_data)

# plot barcode for Taiwanese Bankruptcy Dataset
plot_barcode(phom_taiwanese_data_df)

# plot persistent diagram of Taiwanese Bankruptcy Dataset
plot_persist(phom_taiwanese_data_df)

##One hot encoding for HTRU_2 dataset
library(caret)

#define one-hot encoding function
dummy_HTRU_2<- dummyVars(" ~ .", data=HTRU_2)

#perform one-hot encoding on data frame
HTRU_2.one_hot_df <- data.frame(predict(dummy_HTRU_2, newdata=HTRU_2))

#str final data frame
head(str(HTRU_2.one_hot_df))
## 'data.frame':    17898 obs. of  9 variables:
##  $ V1: num  140.6 102.5 103 136.8 88.7 ...
##  $ V2: num  55.7 58.9 39.3 57.2 40.7 ...
##  $ V3: num  -0.2346 0.4653 0.3233 -0.0684 0.6009 ...
##  $ V4: num  -0.7 -0.515 1.051 -0.636 1.123 ...
##  $ V5: num  3.2 1.68 3.12 3.64 1.18 ...
##  $ V6: num  19.1 14.9 21.7 21 11.5 ...
##  $ V7: num  7.98 10.58 7.74 6.9 14.27 ...
##  $ V8: num  74.2 127.4 63.2 53.6 252.6 ...
##  $ V9: num  0 0 0 0 0 0 0 0 0 0 ...
## NULL
##Persistent Homology of HTRU_2 dataset

# calculate persistent homology for HTRU_2 Dataset
phom_HTRU_2_data_df <- calculate_homology(HTRU_2)

# plot barcode for HTRU_2 Dataset
plot_barcode(phom_HTRU_2_data_df)

# plot persistent diagram of HTRU_2 Dataset
plot_persist(phom_HTRU_2_data_df)

##One hot encoding for In.vehicle.coupon.recommendation dataset
library(caret)

#define one-hot encoding function
dummy_in.vehicle.coupon.recommendation<- dummyVars(" ~ .", data=in.vehicle.coupon.recommendation)

#perform one-hot encoding on data frame
in.vehicle.coupon.recommendation_one_hot_df <- data.frame(predict(dummy_in.vehicle.coupon.recommendation, newdata=in.vehicle.coupon.recommendation))


#str final data frame
head(str(in.vehicle.coupon.recommendation_one_hot_df))
## 'data.frame':    12684 obs. of  121 variables:
##  $ destinationHome                                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ destinationNo.Urgent.Place                         : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ destinationWork                                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ passangerAlone                                     : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ passangerFriend.s.                                 : num  0 1 1 1 1 1 1 0 0 0 ...
##  $ passangerKid.s.                                    : num  0 0 0 0 0 0 0 1 1 1 ...
##  $ passangerPartner                                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ weatherRainy                                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ weatherSnowy                                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ weatherSunny                                       : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ temperature                                        : num  55 80 80 80 80 80 55 80 80 80 ...
##  $ time10AM                                           : num  0 1 1 0 0 0 0 1 1 1 ...
##  $ time10PM                                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ time2PM                                            : num  1 0 0 1 1 0 1 0 0 0 ...
##  $ time6PM                                            : num  0 0 0 0 0 1 0 0 0 0 ...
##  $ time7AM                                            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ couponBar                                          : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ couponCarry.out...Take.away                        : num  0 0 1 0 0 0 1 0 1 0 ...
##  $ couponCoffee.House                                 : num  0 1 0 1 1 0 0 0 0 0 ...
##  $ couponRestaurant..20.                              : num  1 0 0 0 0 1 0 1 0 0 ...
##  $ couponRestaurant.20.50.                            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ expiration1d                                       : num  1 0 0 0 1 0 1 0 0 1 ...
##  $ expiration2h                                       : num  0 1 1 1 0 1 0 1 1 0 ...
##  $ genderFemale                                       : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ genderMale                                         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ age21                                              : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ age26                                              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ age31                                              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ age36                                              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ age41                                              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ age46                                              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ age50plus                                          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ agebelow21                                         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ maritalStatusDivorced                              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ maritalStatusMarried.partner                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ maritalStatusSingle                                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ maritalStatusUnmarried.partner                     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ maritalStatusWidowed                               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ has_children                                       : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ educationAssociates.degree                         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ educationBachelors.degree                          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ educationGraduate.degree..Masters.or.Doctorate.    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ educationHigh.School.Graduate                      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ educationSome.college...no.degree                  : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ educationSome.High.School                          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationArchitecture...Engineering               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationArts.Design.Entertainment.Sports...Media : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationBuilding...Grounds.Cleaning...Maintenance: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationBusiness...Financial                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationCommunity...Social.Services              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationComputer...Mathematical                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationConstruction...Extraction                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationEducation.Training.Library               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationFarming.Fishing...Forestry               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationFood.Preparation...Serving.Related       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationHealthcare.Practitioners...Technical     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationHealthcare.Support                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationInstallation.Maintenance...Repair        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationLegal                                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationLife.Physical.Social.Science             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationManagement                               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationOffice...Administrative.Support          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationPersonal.Care...Service                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationProduction.Occupations                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationProtective.Service                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationRetired                                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationSales...Related                          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationStudent                                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationTransportation...Material.Moving         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationUnemployed                               : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ income.100000.or.More                              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ income.12500....24999                              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ income.25000....37499                              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ income.37500....49999                              : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ income.50000....62499                              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ income.62500....74999                              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ income.75000....87499                              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ income.87500....99999                              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ incomeLess.than..12500                             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ car                                                : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ carCar.that.is.too.old.to.install.Onstar..D        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ carcrossover                                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ cardo.not.drive                                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ carMazda5                                          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ carScooter.and.motorcycle                          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Bar                                                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Bar1.3                                             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Bar4.8                                             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Bargt8                                             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Barless1                                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Barnever                                           : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ CoffeeHouse                                        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CoffeeHouse1.3                                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CoffeeHouse4.8                                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CoffeeHousegt8                                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CoffeeHouseless1                                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CoffeeHousenever                                   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ CarryAway                                          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ CarryAway1.3                                       : num  0 0 0 0 0 0 0 0 0 0 ...
##   [list output truncated]
## NULL
in.vehicle.coupon.recommendation_one_hot_100_df <- in.vehicle.coupon.recommendation_one_hot_df[sample(nrow(in.vehicle.coupon.recommendation_one_hot_df), size = 100, replace = FALSE), ]

head(str(in.vehicle.coupon.recommendation_one_hot_100_df))
## 'data.frame':    100 obs. of  121 variables:
##  $ destinationHome                                    : num  0 0 0 0 0 1 0 0 0 0 ...
##  $ destinationNo.Urgent.Place                         : num  1 1 0 1 1 0 1 0 1 1 ...
##  $ destinationWork                                    : num  0 0 1 0 0 0 0 1 0 0 ...
##  $ passangerAlone                                     : num  1 0 1 0 0 1 0 1 1 1 ...
##  $ passangerFriend.s.                                 : num  0 1 0 1 1 0 0 0 0 0 ...
##  $ passangerKid.s.                                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ passangerPartner                                   : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ weatherRainy                                       : num  0 0 1 0 0 0 0 0 0 0 ...
##  $ weatherSnowy                                       : num  0 0 0 0 0 1 0 1 1 0 ...
##  $ weatherSunny                                       : num  1 1 0 1 1 0 1 0 0 1 ...
##  $ temperature                                        : num  80 80 55 80 80 30 80 30 30 80 ...
##  $ time10AM                                           : num  1 1 0 1 1 0 1 0 0 0 ...
##  $ time10PM                                           : num  0 0 0 0 0 1 0 0 0 1 ...
##  $ time2PM                                            : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ time6PM                                            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ time7AM                                            : num  0 0 1 0 0 0 0 1 0 0 ...
##  $ couponBar                                          : num  0 0 0 1 0 0 0 0 1 0 ...
##  $ couponCarry.out...Take.away                        : num  0 0 1 0 1 0 0 0 0 0 ...
##  $ couponCoffee.House                                 : num  1 1 0 0 0 1 1 0 0 0 ...
##  $ couponRestaurant..20.                              : num  0 0 0 0 0 0 0 1 0 1 ...
##  $ couponRestaurant.20.50.                            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ expiration1d                                       : num  1 0 1 1 0 0 0 0 1 1 ...
##  $ expiration2h                                       : num  0 1 0 0 1 1 1 1 0 0 ...
##  $ genderFemale                                       : num  0 0 0 0 0 1 0 0 0 1 ...
##  $ genderMale                                         : num  1 1 1 1 1 0 1 1 1 0 ...
##  $ age21                                              : num  1 0 1 0 1 0 0 0 0 0 ...
##  $ age26                                              : num  0 0 0 0 0 1 1 0 0 0 ...
##  $ age31                                              : num  0 1 0 0 0 0 0 1 1 0 ...
##  $ age36                                              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ age41                                              : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ age46                                              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ age50plus                                          : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ agebelow21                                         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ maritalStatusDivorced                              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ maritalStatusMarried.partner                       : num  0 0 0 0 0 0 0 0 1 1 ...
##  $ maritalStatusSingle                                : num  1 1 0 1 1 1 0 1 0 0 ...
##  $ maritalStatusUnmarried.partner                     : num  0 0 1 0 0 0 1 0 0 0 ...
##  $ maritalStatusWidowed                               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ has_children                                       : num  0 1 0 0 0 0 0 0 1 1 ...
##  $ educationAssociates.degree                         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ educationBachelors.degree                          : num  1 0 0 1 0 1 0 0 1 0 ...
##  $ educationGraduate.degree..Masters.or.Doctorate.    : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ educationHigh.School.Graduate                      : num  0 1 1 0 0 0 0 0 0 0 ...
##  $ educationSome.college...no.degree                  : num  0 0 0 0 1 0 1 1 0 0 ...
##  $ educationSome.High.School                          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationArchitecture...Engineering               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationArts.Design.Entertainment.Sports...Media : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationBuilding...Grounds.Cleaning...Maintenance: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationBusiness...Financial                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationCommunity...Social.Services              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationComputer...Mathematical                  : num  0 0 0 1 0 0 0 0 1 0 ...
##  $ occupationConstruction...Extraction                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationEducation.Training.Library               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationFarming.Fishing...Forestry               : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationFood.Preparation...Serving.Related       : num  0 0 1 0 0 0 0 0 0 1 ...
##  $ occupationHealthcare.Practitioners...Technical     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationHealthcare.Support                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationInstallation.Maintenance...Repair        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationLegal                                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationLife.Physical.Social.Science             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationManagement                               : num  1 1 0 0 0 0 0 0 0 0 ...
##  $ occupationOffice...Administrative.Support          : num  0 0 0 0 0 1 0 0 0 0 ...
##  $ occupationPersonal.Care...Service                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationProduction.Occupations                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationProtective.Service                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationRetired                                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationSales...Related                          : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ occupationStudent                                  : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ occupationTransportation...Material.Moving         : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ occupationUnemployed                               : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ income.100000.or.More                              : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ income.12500....24999                              : num  0 0 1 0 1 1 0 1 0 0 ...
##  $ income.25000....37499                              : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ income.37500....49999                              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ income.50000....62499                              : num  0 1 0 0 0 0 1 0 0 0 ...
##  $ income.62500....74999                              : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ income.75000....87499                              : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ income.87500....99999                              : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ incomeLess.than..12500                             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ car                                                : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ carCar.that.is.too.old.to.install.Onstar..D        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ carcrossover                                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ cardo.not.drive                                    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ carMazda5                                          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ carScooter.and.motorcycle                          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Bar                                                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Bar1.3                                             : num  0 0 1 1 0 0 1 1 0 0 ...
##  $ Bar4.8                                             : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ Bargt8                                             : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ Barless1                                           : num  0 0 0 0 1 1 0 0 0 1 ...
##  $ Barnever                                           : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ CoffeeHouse                                        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CoffeeHouse1.3                                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CoffeeHouse4.8                                     : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ CoffeeHousegt8                                     : num  0 1 0 0 0 0 0 0 0 1 ...
##  $ CoffeeHouseless1                                   : num  0 0 0 0 1 1 0 1 1 0 ...
##  $ CoffeeHousenever                                   : num  1 0 1 1 0 0 0 0 0 0 ...
##  $ CarryAway                                          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CarryAway1.3                                       : num  0 0 0 1 0 0 0 0 0 0 ...
##   [list output truncated]
## NULL
##Persistent Homology of in.vehicle.coupon.recommendation_dataset dataset

# calculate persistent homology for sample of 100 points in the in.vehicle.coupon.recommendation Dataset

phom_in.vehicle.coupon.recommendation_one_hot_100_df_data_df <- calculate_homology(in.vehicle.coupon.recommendation_one_hot_100_df)

# plot barcode for sample 100 in.vehicle.coupon.recommendation Dataset
plot_barcode(phom_in.vehicle.coupon.recommendation_one_hot_100_df_data_df)

# plot persistent diagram of in.vehicle.coupon.recommendation_one_hot_100_df Dataset
plot_persist(phom_in.vehicle.coupon.recommendation_one_hot_100_df_data_df)

##One hot encoding for RT_IOT2022 dataset
library(caret)

#define one-hot encoding function
dummy_RT_IOT2022<- dummyVars(" ~ .", data=RT_IOT2022)

#perform one-hot encoding on data frame
RT_IOT2022_one_hot_df <- data.frame(predict(dummy_RT_IOT2022, newdata=RT_IOT2022))


#str final data frame
head(str(RT_IOT2022_one_hot_df))
## 'data.frame':    123117 obs. of  107 variables:
##  $ X                                    : num  0 1 2 3 4 5 6 7 8 9 ...
##  $ id.orig_p                            : num  38667 51143 44761 60893 51087 ...
##  $ id.resp_p                            : num  1883 1883 1883 1883 1883 ...
##  $ protoicmp                            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ prototcp                             : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ protoudp                             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ service.                             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ servicedhcp                          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ servicedns                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ servicehttp                          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ serviceirc                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ servicemqtt                          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ servicentp                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ serviceradius                        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ servicessh                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ servicessl                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ flow_duration                        : num  32 31.9 32.1 32 31.9 ...
##  $ fwd_pkts_tot                         : num  9 9 9 9 9 9 9 9 9 9 ...
##  $ bwd_pkts_tot                         : num  5 5 5 5 5 5 5 5 5 5 ...
##  $ fwd_data_pkts_tot                    : num  3 3 3 3 3 3 3 3 3 3 ...
##  $ bwd_data_pkts_tot                    : num  3 3 3 3 3 3 3 3 3 3 ...
##  $ fwd_pkts_per_sec                     : num  0.281 0.282 0.28 0.282 0.282 ...
##  $ bwd_pkts_per_sec                     : num  0.156 0.157 0.156 0.156 0.157 ...
##  $ flow_pkts_per_sec                    : num  0.437 0.439 0.436 0.438 0.439 ...
##  $ down_up_ratio                        : num  0.556 0.556 0.556 0.556 0.556 ...
##  $ fwd_header_size_tot                  : num  296 296 296 296 296 296 296 296 296 296 ...
##  $ fwd_header_size_min                  : num  32 32 32 32 32 32 32 32 32 32 ...
##  $ fwd_header_size_max                  : num  40 40 40 40 40 40 40 40 40 40 ...
##  $ bwd_header_size_tot                  : num  168 168 168 168 168 168 168 168 168 168 ...
##  $ bwd_header_size_min                  : num  32 32 32 32 32 32 32 32 32 32 ...
##  $ bwd_header_size_max                  : num  40 40 40 40 40 40 40 40 40 40 ...
##  $ flow_FIN_flag_count                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ flow_SYN_flag_count                  : num  2 2 2 2 2 2 2 2 2 2 ...
##  $ flow_RST_flag_count                  : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ fwd_PSH_flag_count                   : num  3 3 3 3 3 3 3 3 3 3 ...
##  $ bwd_PSH_flag_count                   : num  3 3 3 3 3 3 3 3 3 3 ...
##  $ flow_ACK_flag_count                  : num  13 13 13 13 13 13 13 13 13 13 ...
##  $ fwd_URG_flag_count                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bwd_URG_flag_count                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ flow_CWR_flag_count                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ flow_ECE_flag_count                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fwd_pkts_payload.min                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fwd_pkts_payload.max                 : num  33 33 33 33 33 33 33 33 33 33 ...
##  $ fwd_pkts_payload.tot                 : num  76 76 74 74 76 76 76 76 76 76 ...
##  $ fwd_pkts_payload.avg                 : num  8.44 8.44 8.22 8.22 8.44 ...
##  $ fwd_pkts_payload.std                 : num  13.1 13.1 12.9 12.9 13.1 ...
##  $ bwd_pkts_payload.min                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bwd_pkts_payload.max                 : num  23 23 21 21 23 23 23 23 23 23 ...
##  $ bwd_pkts_payload.tot                 : num  32 32 30 30 32 32 32 32 32 32 ...
##  $ bwd_pkts_payload.avg                 : num  6.4 6.4 6 6 6.4 6.4 6.4 6.4 6.4 6.4 ...
##  $ bwd_pkts_payload.std                 : num  9.56 9.56 8.69 8.69 9.56 ...
##  $ flow_pkts_payload.min                : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ flow_pkts_payload.max                : num  33 33 33 33 33 33 33 33 33 33 ...
##  $ flow_pkts_payload.tot                : num  108 108 104 104 108 108 108 108 108 108 ...
##  $ flow_pkts_payload.avg                : num  7.71 7.71 7.43 7.43 7.71 ...
##  $ flow_pkts_payload.std                : num  11.6 11.6 11.2 11.2 11.6 ...
##  $ fwd_iat.min                          : num  762 247 284 289 388 ...
##  $ fwd_iat.max                          : num  29729183 29855277 29842149 29913775 29814705 ...
##  $ fwd_iat.tot                          : num  32011598 31883584 32124053 31961063 31902362 ...
##  $ fwd_iat.avg                          : num  4001450 3985448 4015507 3995133 3987795 ...
##  $ fwd_iat.std                          : num  10403074 10463456 10442378 10482528 10447019 ...
##  $ bwd_iat.min                          : num  4439 4214 2457 3934 3005 ...
##  $ bwd_iat.max                          : num  1511694 1576436 1476049 1551892 1632083 ...
##  $ bwd_iat.tot                          : num  2026391 1876261 2013770 1883784 1935984 ...
##  $ bwd_iat.avg                          : num  506598 469065 503442 470946 483996 ...
##  $ bwd_iat.std                          : num  680406 741352 660344 724569 768543 ...
##  $ flow_iat.min                         : num  762 247 284 289 388 ...
##  $ flow_iat.max                         : num  29729183 29855277 29842149 29913775 29814705 ...
##  $ flow_iat.tot                         : num  32011598 31883584 32124053 31961063 31902362 ...
##  $ flow_iat.avg                         : num  2462431 2452583 2471081 2458543 2454028 ...
##  $ flow_iat.std                         : num  8199747 8242459 8230593 8257786 8230584 ...
##  $ payload_bytes_per_second             : num  3.37 3.39 3.24 3.25 3.39 ...
##  $ fwd_subflow_pkts                     : num  3 3 3 3 3 3 3 3 3 3 ...
##  $ bwd_subflow_pkts                     : num  1.67 1.67 1.67 1.67 1.67 ...
##  $ fwd_subflow_bytes                    : num  25.3 25.3 24.7 24.7 25.3 ...
##  $ bwd_subflow_bytes                    : num  10.7 10.7 10 10 10.7 ...
##  $ fwd_bulk_bytes                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bwd_bulk_bytes                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fwd_bulk_packets                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bwd_bulk_packets                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fwd_bulk_rate                        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bwd_bulk_rate                        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ active.min                           : num  2282415 2028307 2281904 2047288 2087657 ...
##  $ active.max                           : num  2282415 2028307 2281904 2047288 2087657 ...
##  $ active.tot                           : num  2282415 2028307 2281904 2047288 2087657 ...
##  $ active.avg                           : num  2282415 2028307 2281904 2047288 2087657 ...
##  $ active.std                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ idle.min                             : num  29729183 29855277 29842149 29913775 29814705 ...
##  $ idle.max                             : num  29729183 29855277 29842149 29913775 29814705 ...
##  $ idle.tot                             : num  29729183 29855277 29842149 29913775 29814705 ...
##  $ idle.avg                             : num  29729183 29855277 29842149 29913775 29814705 ...
##  $ idle.std                             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fwd_init_window_size                 : num  64240 64240 64240 64240 64240 ...
##  $ bwd_init_window_size                 : num  26847 26847 26847 26847 26847 ...
##  $ fwd_last_window_size                 : num  502 502 502 502 502 502 502 502 502 502 ...
##  $ Attack_typeARP_poisioning            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Attack_typeDDOS_Slowloris            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Attack_typeDOS_SYN_Hping             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Attack_typeMetasploit_Brute_Force_SSH: num  0 0 0 0 0 0 0 0 0 0 ...
##   [list output truncated]
## NULL
RT_IOT2022_one_hot_df_1000 <- RT_IOT2022_one_hot_df[sample(nrow(RT_IOT2022_one_hot_df), size = 1000, replace = FALSE), ]
head(str(RT_IOT2022_one_hot_df_1000))
## 'data.frame':    1000 obs. of  107 variables:
##  $ X                                    : num  2139 70389 70221 71870 86340 ...
##  $ id.orig_p                            : num  33037 60889 60822 61663 1698 ...
##  $ id.resp_p                            : num  1883 21 21 21 21 ...
##  $ protoicmp                            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ prototcp                             : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ protoudp                             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ service.                             : num  0 1 1 1 1 1 1 0 1 1 ...
##  $ servicedhcp                          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ servicedns                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ servicehttp                          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ serviceirc                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ servicemqtt                          : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ servicentp                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ serviceradius                        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ servicessh                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ servicessl                           : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ flow_duration                        : num  4.19e+01 0.00 0.00 1.00e-06 1.00e-06 ...
##  $ fwd_pkts_tot                         : num  10 1 1 1 1 1 1 14 1 1 ...
##  $ bwd_pkts_tot                         : num  6 0 0 1 1 1 0 14 1 1 ...
##  $ fwd_data_pkts_tot                    : num  3 1 1 1 1 1 1 5 0 1 ...
##  $ bwd_data_pkts_tot                    : num  4 0 0 0 0 0 0 6 0 0 ...
##  $ fwd_pkts_per_sec                     : num  2.39e-01 0.00 0.00 1.05e+06 8.39e+05 ...
##  $ bwd_pkts_per_sec                     : num  1.43e-01 0.00 0.00 1.05e+06 8.39e+05 ...
##  $ flow_pkts_per_sec                    : num  3.82e-01 0.00 0.00 2.10e+06 1.68e+06 ...
##  $ down_up_ratio                        : num  0.6 0 0 1 1 1 0 1 1 1 ...
##  $ fwd_header_size_tot                  : num  328 20 20 20 20 20 20 300 20 20 ...
##  $ fwd_header_size_min                  : num  32 20 20 20 20 20 20 20 20 20 ...
##  $ fwd_header_size_max                  : num  40 20 20 20 20 20 20 40 20 20 ...
##  $ bwd_header_size_tot                  : num  200 0 0 20 20 20 0 292 20 20 ...
##  $ bwd_header_size_min                  : num  32 0 0 20 20 20 0 20 20 20 ...
##  $ bwd_header_size_max                  : num  40 0 0 20 20 20 0 32 20 20 ...
##  $ flow_FIN_flag_count                  : num  0 0 0 0 0 0 0 2 1 0 ...
##  $ flow_SYN_flag_count                  : num  2 1 1 1 1 1 1 2 0 1 ...
##  $ flow_RST_flag_count                  : num  1 0 0 1 1 1 0 2 1 1 ...
##  $ fwd_PSH_flag_count                   : num  3 0 0 0 0 0 0 5 1 0 ...
##  $ bwd_PSH_flag_count                   : num  4 0 0 0 0 0 0 3 0 0 ...
##  $ flow_ACK_flag_count                  : num  15 0 0 1 1 1 0 25 1 1 ...
##  $ fwd_URG_flag_count                   : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ bwd_URG_flag_count                   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ flow_CWR_flag_count                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ flow_ECE_flag_count                  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fwd_pkts_payload.min                 : num  0 120 120 120 120 120 120 0 0 120 ...
##  $ fwd_pkts_payload.max                 : num  33 120 120 120 120 120 120 517 0 120 ...
##  $ fwd_pkts_payload.tot                 : num  77 120 120 120 120 ...
##  $ fwd_pkts_payload.avg                 : num  7.7 120 120 120 120 ...
##  $ fwd_pkts_payload.std                 : num  12.8 0 0 0 0 ...
##  $ bwd_pkts_payload.min                 : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bwd_pkts_payload.max                 : num  22 0 0 0 0 ...
##  $ bwd_pkts_payload.tot                 : num  35 0 0 0 0 ...
##  $ bwd_pkts_payload.avg                 : num  5.83 0 0 0 0 ...
##  $ bwd_pkts_payload.std                 : num  8.21 0 0 0 0 ...
##  $ flow_pkts_payload.min                : num  0 120 120 0 0 0 120 0 0 0 ...
##  $ flow_pkts_payload.max                : num  33 120 120 120 120 ...
##  $ flow_pkts_payload.tot                : num  112 120 120 120 120 ...
##  $ flow_pkts_payload.avg                : num  7 120 120 60 60 ...
##  $ flow_pkts_payload.std                : num  11 0 0 84.9 84.9 ...
##  $ fwd_iat.min                          : num  241 0 0 0 0 ...
##  $ fwd_iat.max                          : num  39889886 0 0 0 0 ...
##  $ fwd_iat.tot                          : num  41910572 0 0 0 0 ...
##  $ fwd_iat.avg                          : num  4656730 0 0 0 0 ...
##  $ fwd_iat.std                          : num  13219944 0 0 0 0 ...
##  $ bwd_iat.min                          : num  77 0 0 0 0 ...
##  $ bwd_iat.max                          : num  1567814 0 0 0 0 ...
##  $ bwd_iat.tot                          : num  1871896 0 0 0 0 ...
##  $ bwd_iat.avg                          : num  374379 0 0 0 0 ...
##  $ bwd_iat.std                          : num  671237 0 0 0 0 ...
##  $ flow_iat.min                         : num  77.009 0 0 0.954 1.192 ...
##  $ flow_iat.max                         : num  3.99e+07 0.00 0.00 9.54e-01 1.19 ...
##  $ flow_iat.tot                         : num  4.19e+07 0.00 0.00 9.54e-01 1.19 ...
##  $ flow_iat.avg                         : num  2.79e+06 0.00 0.00 9.54e-01 1.19 ...
##  $ flow_iat.std                         : num  10268536 0 0 0 0 ...
##  $ payload_bytes_per_second             : num  2.67 0.00 0.00 1.26e+08 1.01e+08 ...
##  $ fwd_subflow_pkts                     : num  3.33 1 1 1 1 ...
##  $ bwd_subflow_pkts                     : num  2 0 0 1 1 1 0 14 1 1 ...
##  $ fwd_subflow_bytes                    : num  25.7 120 120 120 120 ...
##  $ bwd_subflow_bytes                    : num  11.7 0 0 0 0 ...
##  $ fwd_bulk_bytes                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bwd_bulk_bytes                       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fwd_bulk_packets                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bwd_bulk_packets                     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fwd_bulk_rate                        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bwd_bulk_rate                        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ active.min                           : num  2.02e+06 0.00 0.00 9.54e-01 1.19 ...
##  $ active.max                           : num  2.02e+06 0.00 0.00 9.54e-01 1.19 ...
##  $ active.tot                           : num  2.02e+06 0.00 0.00 9.54e-01 1.19 ...
##  $ active.avg                           : num  2.02e+06 0.00 0.00 9.54e-01 1.19 ...
##  $ active.std                           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ idle.min                             : num  39889886 0 0 0 0 ...
##  $ idle.max                             : num  39889886 0 0 0 0 ...
##  $ idle.tot                             : num  39889886 0 0 0 0 ...
##  $ idle.avg                             : num  39889886 0 0 0 0 ...
##  $ idle.std                             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fwd_init_window_size                 : num  64240 64 64 64 64 ...
##  $ bwd_init_window_size                 : num  26847 0 0 0 0 ...
##  $ fwd_last_window_size                 : num  502 64 64 64 64 ...
##  $ Attack_typeARP_poisioning            : num  0 0 0 0 0 0 0 1 0 0 ...
##  $ Attack_typeDDOS_Slowloris            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Attack_typeDOS_SYN_Hping             : num  0 1 1 1 1 1 1 0 0 1 ...
##  $ Attack_typeMetasploit_Brute_Force_SSH: num  0 0 0 0 0 0 0 0 0 0 ...
##   [list output truncated]
## NULL
##Persistent Homology of 1000 sampled points of the RT_IOT2022_one_hot_df dataset

# calculate persistent homology for 1000 sampled points of the RT_IOT2022 Dataset
phom_RT_IOT2022_1000_data_df <- calculate_homology(RT_IOT2022_one_hot_df_1000)

# plot barcode for 1000 sampled points of the RT_IOT2022 Dataset
plot_barcode(phom_RT_IOT2022_1000_data_df)

# plot persistent diagram of of 1000 points of the RT_IOT2022 Dataset
plot_persist(phom_RT_IOT2022_1000_data_df)

##Persistent Homology of PP_URL_Culled Dataset
#Remove 5 useless variables from dataset preventing persistent homology.

PP_URL_Culled<-PhiUSIIL_Phishing_URL_Dataset[,-c(1,2,4,7,30)]

PP_URL_Culled_1000 <- PP_URL_Culled[sample(nrow(PP_URL_Culled), size = 1000, replace = FALSE), ]
head(str(PP_URL_Culled_1000))
## 'data.frame':    1000 obs. of  51 variables:
##  $ URLLength                 : int  35 32 41 62 20 25 30 27 35 37 ...
##  $ DomainLength              : int  26 25 24 34 13 18 23 20 28 29 ...
##  $ IsDomainIP                : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ URLSimilarityIndex        : num  69.5 100 39.9 24.3 100 ...
##  $ CharContinuationRate      : num  0.591 1 0.857 0.258 1 ...
##  $ TLDLegitimateProb         : num  0.52291 0.52291 0.00641 0.01293 0.00598 ...
##  $ URLCharProb               : num  0.0533 0.0631 0.0596 0.0589 0.0495 ...
##  $ TLDLength                 : int  3 3 2 2 2 3 2 2 3 3 ...
##  $ NoOfSubDomain             : int  1 1 1 1 1 1 2 1 1 1 ...
##  $ HasObfuscation            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NoOfObfuscatedChar        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ ObfuscationRatio          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ NoOfLettersInURL          : int  23 19 29 46 7 12 16 13 22 21 ...
##  $ LetterRatioInURL          : num  0.657 0.594 0.707 0.742 0.35 0.48 0.533 0.481 0.629 0.568 ...
##  $ NoOfDegitsInURL           : int  1 0 0 0 0 0 0 0 0 5 ...
##  $ DegitRatioInURL           : num  0.029 0 0 0 0 0 0 0 0 0.135 ...
##  $ NoOfEqualsInURL           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NoOfQMarkInURL            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NoOfAmpersandInURL        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NoOfOtherSpecialCharsInURL: int  3 1 4 8 1 1 2 2 1 3 ...
##  $ SpacialCharRatioInURL     : num  0.086 0.031 0.098 0.129 0.05 0.04 0.067 0.074 0.029 0.081 ...
##  $ IsHTTPS                   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ LineOfCode                : int  228 1521 16 126 238 540 1164 236 2752 48 ...
##  $ LargestLineLength         : int  77763 10936 286 108 181 1350 16009 434 9381 710 ...
##  $ HasTitle                  : int  1 1 1 1 1 1 1 1 1 0 ...
##  $ DomainTitleMatchScore     : num  0 100 0 0 100 100 0 100 100 0 ...
##  $ URLTitleMatchScore        : num  0 100 0 0 100 100 0 100 100 0 ...
##  $ HasFavicon                : int  0 1 0 0 1 0 0 1 0 0 ...
##  $ Robots                    : int  0 1 0 0 0 0 1 1 1 0 ...
##  $ IsResponsive              : int  0 1 1 1 1 1 1 1 1 0 ...
##  $ NoOfURLRedirect           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NoOfSelfRedirect          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ HasDescription            : int  0 1 0 0 1 0 0 0 1 0 ...
##  $ NoOfPopup                 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ NoOfiFrame                : int  2 12 0 0 0 3 4 1 0 0 ...
##  $ HasExternalFormSubmit     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ HasSocialNet              : int  0 1 0 0 0 1 1 0 1 0 ...
##  $ HasSubmitButton           : int  0 1 0 1 0 0 1 0 0 0 ...
##  $ HasHiddenFields           : int  0 1 0 0 0 1 1 0 0 0 ...
##  $ HasPasswordField          : int  0 0 0 1 0 0 1 0 0 0 ...
##  $ Bank                      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Pay                       : int  0 0 0 0 0 1 1 0 0 0 ...
##  $ Crypto                    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ HasCopyrightInfo          : int  0 1 0 0 1 0 1 1 1 0 ...
##  $ NoOfImage                 : int  0 85 0 1 12 8 21 11 65 0 ...
##  $ NoOfCSS                   : int  0 47 0 1 3 11 9 10 3 0 ...
##  $ NoOfJS                    : int  1 57 0 0 3 11 31 6 5 0 ...
##  $ NoOfSelfRef               : int  0 193 0 0 4 19 40 0 70 0 ...
##  $ NoOfEmptyRef              : int  0 8 0 2 0 3 3 0 1 0 ...
##  $ NoOfExternalRef           : int  1 245 1 0 14 4 54 2 230 1 ...
##  $ label                     : int  0 1 0 0 1 1 1 1 1 0 ...
## NULL
##Persistent Homology of PP_URL_Culled dataset

# calculate persistent homology for PP_URL_Culled Dataset
phom_PP_URL_Culled_1000_data_df <- calculate_homology(PP_URL_Culled_1000)


# plot barcode for PP_URL_Culled Dataset
plot_barcode(phom_PP_URL_Culled_1000_data_df)

# plot persistent diagram of phom_PP_URL Dataset
plot_persist(phom_PP_URL_Culled_1000_data_df)

##One hot encoding for BitcoinHeistData.2.culled2 dataset

#Bitcoin data culled
BitcoinHeistData.2.culled<-BitcoinHeistData.2[,-1]

BitcoinHeistData.2.culled$label2<-str_sub(BitcoinHeistData.2.culled$label,1,5)

BitcoinHeistData.2.culled2<-BitcoinHeistData.2.culled[,-9]

library(caret)

#define one-hot encoding function
dummy_BitcoinHeistData.2.culled2<- dummyVars(" ~ .", data=BitcoinHeistData.2.culled2)

#perform one-hot encoding on data frame
BitcoinHeistData.2.culled2_one_hot_df <- data.frame(predict(dummy_BitcoinHeistData.2.culled2, newdata=BitcoinHeistData.2.culled2))

#str final data frame
head(str(BitcoinHeistData.2.culled2_one_hot_df))
## 'data.frame':    2916697 obs. of  12 variables:
##  $ year       : num  2017 2016 2016 2016 2016 ...
##  $ day        : num  11 132 246 322 238 96 225 324 298 62 ...
##  $ length     : num  18 44 0 72 144 144 142 78 144 112 ...
##  $ weight     : num  0.008333 0.000244 1 0.003906 0.072848 ...
##  $ count      : num  1 1 1 1 456 ...
##  $ looped     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ neighbors  : num  2 1 2 2 1 1 2 2 2 1 ...
##  $ income     : num  1.00e+08 1.00e+08 2.00e+08 7.12e+07 2.00e+08 ...
##  $ label2montr: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ label2padua: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ label2princ: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ label2white: num  0 0 0 0 0 0 0 0 0 0 ...
## NULL
BitcoinHeistData.2.culled2_one_hot_1000_df <- BitcoinHeistData.2.culled2_one_hot_df[sample(nrow(adult.one_hot_df), size = 1000, replace = FALSE), ]
head(str(BitcoinHeistData.2.culled2_one_hot_1000_df))
## 'data.frame':    1000 obs. of  12 variables:
##  $ year       : num  2014 2013 2015 2014 2016 ...
##  $ day        : num  120 263 202 80 155 85 210 331 166 273 ...
##  $ length     : num  144 144 16 144 12 0 0 2 18 4 ...
##  $ weight     : num  0.000448 1.927315 0.024306 1.056854 0.0625 ...
##  $ count      : num  1098 1796 6 1609 1 ...
##  $ looped     : num  0 46 0 2 0 0 0 0 0 0 ...
##  $ neighbors  : num  1 4 1 2 2 2 2 2 2 2 ...
##  $ income     : num  2.38e+08 2.05e+09 2.64e+08 2.52e+08 1.20e+08 ...
##  $ label2montr: num  0 1 0 0 1 0 0 0 1 1 ...
##  $ label2padua: num  1 0 1 1 0 0 0 0 0 0 ...
##  $ label2princ: num  0 0 0 0 0 1 1 1 0 0 ...
##  $ label2white: num  0 0 0 0 0 0 0 0 0 0 ...
## NULL
##Persistent Homology of PP_URL_Culled dataset

# calculate persistent homology for BitcoinHeistData.2.culled Dataset
phom_BitcoinHeistData.2.culled2_one_hot_1000_df <- calculate_homology(BitcoinHeistData.2.culled2_one_hot_1000_df)


# plot barcode for BitcoinHeistData.2.culled2_one_hot_1000_df Dataset
plot_barcode(phom_BitcoinHeistData.2.culled2_one_hot_1000_df)

# plot persistent diagram of BitcoinHeistData.2.culled2_one_hot_1000_df Dataset
plot_persist(phom_BitcoinHeistData.2.culled2_one_hot_1000_df)

#Diabetic data culled


diabetic_data_culled<-diabetic_data[,-c(1,2,13,14)]

##One hot encoding for diabetic_data_culled dataset
library(caret)

#define one-hot encoding function
dummy_diabetic_data_culled<- dummyVars(" ~ .", data=diabetic_data_culled)

#perform one-hot encoding on data frame
#diabetic_data_culled_one_hot_df <- data.frame(predict(dummy_diabetic_data_culled, newdata=diabetic_data_culled))


##str final data frame
#str(diabetic_data_culled_one_hot_df)

#diabetic_data_culled_one_hot_1000_df <- adult.one_hot_df[sample(nrow(diabetic_data_culled_one_hot_df, size = 1000, replace = FALSE), ]

##Persistent Homology of Diabetic dataset

# calculate persistent homology for diabetic_data_culled Dataset
#phom_diabetic_data_culled_one_hot_1000_df <- calculate_homology(diabetic_data_culled_one_hot_1000_df)


# plot barcode for diabetic_data_culled_one_hot_1000 Dataset
#plot_barcode(phom_diabetic_data_culled_one_hot_1000_df)

# plot persistent diagram of diabetic_data_culled_one_hot_1000 Dataset
#plot_persist(phom_diabetic_data_culled_one_hot_1000_df)


#Poker Hand Dataset
poker_hand_training_true_1000 <- poker.hand.training.true[sample(nrow(poker.hand.training.true), size = 1000, replace = FALSE), ]
 
##Persistent Homology of Poker Hand Training dataset

# calculate persistent homology for poker_hand_training_true_1000 Dataset
phom_poker_hand_training_true_1000 <- calculate_homology(poker_hand_training_true_1000)


# plot barcode for poker_hand_training_true_1000 Dataset
plot_barcode(phom_poker_hand_training_true_1000)

# plot persistent diagram of poker_hand_training_true_1000 Dataset
plot_persist(phom_poker_hand_training_true_1000)

#Internet Firewall Dataset
IntFirewallData<-log2

##One hot encoding for BitcoinHeistData.2.culled2 dataset
library(caret)

#define one-hot encoding function
dummy_IntFirewallData <- dummyVars(" ~ .", data= IntFirewallData)

#perform one-hot encoding on data frame
IntFirewallData_one_hot_df <- data.frame(predict(dummy_IntFirewallData, newdata= IntFirewallData))

#str final data frame
head(str(IntFirewallData_one_hot_df))
## 'data.frame':    65532 obs. of  15 variables:
##  $ Source.Port         : num  57222 56258 6881 50553 50002 ...
##  $ Destination.Port    : num  53 3389 50321 3389 443 ...
##  $ NAT.Source.Port     : num  54587 56258 43265 50553 45848 ...
##  $ NAT.Destination.Port: num  53 3389 50321 3389 443 ...
##  $ Actionallow         : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ Actiondeny          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Actiondrop          : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Actionreset.both    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Bytes               : num  177 4768 238 3327 25358 ...
##  $ Bytes.Sent          : num  94 1600 118 1438 6778 ...
##  $ Bytes.Received      : num  83 3168 120 1889 18580 ...
##  $ Packets             : num  2 19 2 15 31 21 6 23 1 31 ...
##  $ Elapsed.Time..sec.  : num  30 17 1199 17 16 ...
##  $ pkts_sent           : num  1 10 1 8 13 12 3 12 1 15 ...
##  $ pkts_received       : num  1 9 1 7 18 9 3 11 0 16 ...
## NULL
IntFirewallData_one_hot_1000_df <- IntFirewallData_one_hot_df[sample(nrow(IntFirewallData_one_hot_df), size = 1000, replace = FALSE), ]
IntFirewallData_one_hot_1000_df
##       Source.Port Destination.Port NAT.Source.Port NAT.Destination.Port
## 4519        55346              443           44978                  443
## 13921       54883            51505               0                    0
## 21316       62446              445               0                    0
## 61397       53535             1393           14227                 1393
## 59091       52802            56205               0                    0
## 61449       51584              443           13301                  443
## 21342       62243              445               0                    0
## 10124       38658              443           22719                  443
## 62762       57470            51413           17602                51413
## 42177       60052            37965               0                    0
## 21317       62442              445               0                    0
## 2656        55055              445               0                    0
## 35422       55075               53           29760                   53
## 4781        57067               53           57189                   53
## 55032       55232               53            8300                   53
## 2793        28579            64147               0                    0
## 63544       62569            44847               0                    0
## 1966        41614             5222           29157                 5222
## 58168        1925               53            9734                   53
## 37824       60207               80           10851                   80
## 48250       50316               80           40877                   80
## 41867       49858              443            3076                  443
## 32808       53455               53               0                    0
## 60722       51653              445               0                    0
## 12209       49417               53           14090                   53
## 58810       60354              443           37287                  443
## 43979       27443               23               0                    0
## 36938       49152               53           17272                   53
## 14813       63932              445               0                    0
## 59979       58046               53           58046                   53
## 55479       50440               53           39485                   53
## 41314       57470             6889           19938                 6889
## 22837       20962            22114               0                    0
## 56850       34114              443           15852                  443
## 7337        58638             5900               0                    0
## 23632       61750              445               0                    0
## 16543       47034              443            4001                  443
## 61906       55892              445               0                    0
## 64959       50010              445               0                    0
## 32094       64173              445               0                    0
## 38121       57685              445               0                    0
## 48018       27005            27015           19164                27015
## 32945       59119              445               0                    0
## 50828       56644              443            7171                  443
## 15931        2991               23               0                    0
## 4392        53935            42874           31408                42874
## 62548       53064              443           10926                  443
## 28973       51959            42269           61621                42269
## 63723        2267               53            3037                   53
## 52780       50280            57470               0                    0
## 47578       55009              445               0                    0
## 22171       63118             7000           63118                 7000
## 50173       52763               53               0                    0
## 22596       53814             6725           20985                 6725
## 41014       51320            44847               0                    0
## 37644       58054              445               0                    0
## 44523       43725            54532               0                    0
## 40851       26900            27017            5951                27017
## 2252        50453              445               0                    0
## 43215       61104              445               0                    0
## 6227        41207               80           55374                   80
## 3920        56131               53           35553                   53
## 60301       16524            22114               0                    0
## 3081        55107               53           60741                   53
## 8570        57220               53           51033                   53
## 65470       51689             4765           24946                 4765
## 28805       62824            37807               0                    0
## 41110       63447            44847               0                    0
## 25385       49516              445               0                    0
## 44242       53290              443            5427                  443
## 35075        7226            50584               0                    0
## 32480       49235               53           22418                   53
## 54476       55521               53           35090                   53
## 51675       52887            37807               0                    0
## 44625       31574               53           31574                   53
## 45247       45339              443           35795                  443
## 39488       62243              445               0                    0
## 8487        51414            50584               0                    0
## 5842        64670               53           60367                   53
## 23741       58344               53           58344                   53
## 59676       40232               53           19769                   53
## 58646       48478            49903               0                    0
## 28429       59886            64147               0                    0
## 64070       49505               53           46670                   53
## 17301       55942               53           19231                   53
## 10147       41803              443            1948                  443
## 34908       63399              445               0                    0
## 13079       49929            26467               0                    0
## 25136       65313               53            7915                   53
## 53449       57028             8080           10225                 8080
## 17039       57119               53               0                    0
## 65011       55044              445               0                    0
## 30757       56152               53           29649                   53
## 32937       59141              445               0                    0
## 61594       50490              443           43169                  443
## 41164       49740              443           61794                  443
## 37252       49502               53            5291                   53
## 31019       10736            35440               0                    0
## 41787       56946              445               0                    0
## 30595       33525               53           23050                   53
## 32431       56889               53           49457                   53
## 26499       54365            64147               0                    0
## 30173       43136              443           64659                  443
## 55813       63204              443           63204                  443
## 62233       51236              445               0                    0
## 51031       58751             9571               0                    0
## 23204       55529            44847               0                    0
## 23677       49994            26467               0                    0
## 45799       51500              443           57250                  443
## 54341       24480            35440               0                    0
## 26900        1059            44847               0                    0
## 51885        5786               53            5786                   53
## 1749        30966            44847               0                    0
## 64900       57171               53           25491                   53
## 58048       65532               53           19828                   53
## 57868       52045              445               0                    0
## 29692       55066               53           26532                   53
## 23808       50544               53           44871                   53
## 8669        32914             5222           62781                 5222
## 55070       63782             6969           33183                 6969
## 4420        55122              443           57128                  443
## 4438        65220               53           17734                   53
## 13451       55227               53            7352                   53
## 20527       60377              443           19236                  443
## 11491       51979               80           40174                   80
## 2340        53928            15503               0                    0
## 42925       56582              445               0                    0
## 44740       42252            33303               0                    0
## 28485       57987             3478           61463                 3478
## 37233       64275               53           47758                   53
## 57552       53555            57861           41718                57861
## 4022        58638             5900           58638                 5900
## 27255       58276            43890               0                    0
## 40267       57403              445               0                    0
## 22745       21108            25174               0                    0
## 18627       56308               53           17371                   53
## 21567       27005            27015           38295                27015
## 14724       36628               80           54402                   80
## 63564       50848              445               0                    0
## 41864       54993              443           50582                  443
## 58136       49378               53           33691                   53
## 2761        54995              445               0                    0
## 63672       50880               80           36202                   80
## 53764       53075              445               0                    0
## 21811       58891              443           55777                  443
## 9941        52052               80           13756                   80
## 25445       56695               53           39577                   53
## 9207        15319            51505               0                    0
## 22495       50293              445               0                    0
## 60133       35509             3389           35509                 3389
## 27618       65216               53           41251                   53
## 64536       61670            57470               0                    0
## 59619       36712               53               0                    0
## 33984       63662              445               0                    0
## 46358       42225              443            6040                  443
## 36883       43604               53           41746                   53
## 54999       53093              445               0                    0
## 37217       49901               53           43057                   53
## 29937       41996             5900               0                    0
## 58791       51744             1678           56992                 1678
## 9019        59182               53           44334                   53
## 27556       61413               80            4426                   80
## 50951       27005            27015            5485                27015
## 53670       58082              445               0                    0
## 1998        18204            55160               0                    0
## 5556        51988              443           46112                  443
## 34498       55144              443            5039                  443
## 8791        39380            49903               0                    0
## 35198       37936              443           26226                  443
## 49599       64288               53           63361                   53
## 10546       50411             8080           50411                 8080
## 7832        58638             5900               0                    0
## 5724         6666              138               0                    0
## 53065       59574              443            1442                  443
## 55222       50878            53733               0                    0
## 60563        4910             2149               0                    0
## 6649        52195            50930           43224                50930
## 16201       63333              445               0                    0
## 37112       54134            64147               0                    0
## 39159       51900            15755           25086                15755
## 50280       51433              443           11382                  443
## 21664       62164              445               0                    0
## 31355       64321               53           62198                   53
## 33033       59057              445               0                    0
## 38411       51974               80           47213                   80
## 36913       51311               22           51311                   22
## 58817       30960            45673            1133                45673
## 9343        26900            27017           11369                27017
## 39610       60374              443            9623                  443
## 41479       57023              445               0                    0
## 36548       58059              445               0                    0
## 9626        61369            64147               0                    0
## 48451       51689              993           61833                  993
## 56007       57259               53           43508                   53
## 42634       53294              443           27871                  443
## 63808       52482              443            8869                  443
## 25140       55752               53            6985                   53
## 49492       56793            14902            9937                14902
## 3807        64724               53           10875                   53
## 13077       49841             1433               0                    0
## 18195       56912               53            5094                   53
## 50565       56555              443           29813                  443
## 38529       57083               53           19689                   53
## 508         52266            31225            1238                31225
## 16306       64950               53           51754                   53
## 10393       65006              445               0                    0
## 30229       64385               53           21883                   53
## 34807       60476               80           59400                   80
## 27594       57470            64430           53438                64430
## 21686       22924            25174               0                    0
## 63632       45941            23386               0                    0
## 31394       52769              443            2032                  443
## 51622       27005            27015           45197                27015
## 23887       34788              443           32056                  443
## 61223       49435              443            3627                  443
## 27278       60625              445               0                    0
## 44310       56194              445               0                    0
## 15214       63823              445               0                    0
## 49693       57310              443           42139                  443
## 9004        32911              443           37831                  443
## 63308       63732            40642           31051                40642
## 20938       49776               53           56083                   53
## 14241        9234               80           38036                   80
## 39693       52312              443           50036                  443
## 56361       43033              443           43033                  443
## 42726       10038              443           58313                  443
## 6999        58638             5900               0                    0
## 56448       52788              445               0                    0
## 51193       57160               53           20461                   53
## 10311       31126               53           31126                   53
## 53946       53428              445               0                    0
## 63100       11136            15503               0                    0
## 32192       51107               80           61498                   80
## 17834       55534               80            1439                   80
## 11840        6881            50239           58874                50239
## 18396       64146               53           63495                   53
## 36974       53720            50795           49301                50795
## 51924       27005            27018           55202                27018
## 1154        62069              443           28586                  443
## 37419       58497              443           60144                  443
## 12061       44925              443           50958                  443
## 47095       60039               80           53912                   80
## 20634       55384               53           49003                   53
## 11071       55439            37965               0                    0
## 34051       52793              443           12880                  443
## 41868       54994              443           29516                  443
## 20200       56226               53           49033                   53
## 43393       35120            26467               0                    0
## 50015        7104               23               0                    0
## 19855       62548              445               0                    0
## 19289       51979            25174               0                    0
## 59568       50084            26672               0                    0
## 20456       55435            25174               0                    0
## 51753       50878            53733               0                    0
## 56755       63305            62413               0                    0
## 1396        50764              445               0                    0
## 24703       62478              443           29633                  443
## 47162       28176            34021               0                    0
## 41100       56825              445               0                    0
## 43756       56838            35124           44520                35124
## 47463       59323               53            1369                   53
## 36910       56939               53           27310                   53
## 21523       60627              443           41376                  443
## 51106       61838            37965               0                    0
## 36874       50005               53            1672                   53
## 24069       64907               53           25739                   53
## 48118       59845              445               0                    0
## 35405       42278              443           42278                  443
## 4221        49945              445               0                    0
## 52109       29424            37807               0                    0
## 15172       22916            25174               0                    0
## 13853       64835               53           41380                   53
## 45875       60893              443            5461                  443
## 13064       58646            44847               0                    0
## 13594       64101              445               0                    0
## 18344       56752               53           41172                   53
## 38000       37965            23215           60797                23215
## 55635       52251              443           26101                  443
## 4484        10013            40024           56811                40024
## 3323        55879               53           40028                   53
## 17453       53022            25174               0                    0
## 56203       62767            57470               0                    0
## 45239       56834            46513           28288                46513
## 58768       49418            27016           27118                27016
## 33224       37653             5223           25600                 5223
## 48820       26502            64147               0                    0
## 49940       63818            19291           56455                19291
## 49139       60327               80           34206                   80
## 28207       60319              445               0                    0
## 23732       50269              443           37644                  443
## 20414       55384               53           37937                   53
## 2857        56794              443           16368                  443
## 34688       62343              443           28448                  443
## 47876       56343              443           34805                  443
## 44706       65064            30188               0                    0
## 7466        58638             5900               0                    0
## 299         52270             3708           62556                 3708
## 13880       48817            45919           48817                45919
## 41753       48125             7548               0                    0
## 8882        53614              445               0                    0
## 19941       54969               53           12796                   53
## 32088       22125               23               0                    0
## 6832        49384              445               0                    0
## 42415       56456              445               0                    0
## 65526       65323               53           33275                   53
## 2895        49929               53           57396                   53
## 58580       49996               53           62453                   53
## 25475       49390               53           24759                   53
## 65009       37837            37965               0                    0
## 10315       65343               53            9995                   53
## 53370       55028              443           55949                  443
## 35060       62293               53           18482                   53
## 60614       51698              445               0                    0
## 50106       53178            26467               0                    0
## 8385        65446              445               0                    0
## 10103       64733            35440               0                    0
## 65059       43611            61903               0                    0
## 42426       45654               67               0                    0
## 26704       50606              443           59722                  443
## 13729       59485               80           39693                   80
## 5790        56240               80           56240                   80
## 26589       60816              445               0                    0
## 16578       57442             2258           57442                 2258
## 4490        55939               53           57194                   53
## 21604       50949               80           11302                   80
## 18982       64595            64147               0                    0
## 13694       64229              445               0                    0
## 50725       59029              445               0                    0
## 39406         443            36576               0                    0
## 30599       64421               53           64542                   53
## 43405       61016              445               0                    0
## 39561       62207              445               0                    0
## 9926        54553               53           37407                   53
## 9780        65000              445               0                    0
## 52308       38630              443           20829                  443
## 9877        57149               53           34608                   53
## 26063       50629              443           47938                  443
## 53399       65392              443           55705                  443
## 9951        45057              443            6642                  443
## 56439        1027            11392               0                    0
## 62318       53128              443           36411                  443
## 50879       56966               80           60389                   80
## 36016       65264               53            8238                   53
## 22278       50211             1688            3994                 1688
## 48370       42557              443           48761                  443
## 57086       59418              443           19462                  443
## 22735       55435            25174               0                    0
## 6738        54022              445               0                    0
## 20708       64205               53           39534                   53
## 46397       49479            56205               0                    0
## 22130       61017            64147               0                    0
## 40081       64537               53           43379                   53
## 6501        63107              443           39326                  443
## 18037       63032            22114               0                    0
## 18950       50207               53           12070                   53
## 43834       55762               53           54612                   53
## 60049       55946              443           34803                  443
## 60588       51286              445               0                    0
## 62148       61705               80           47319                   80
## 3520        64568               53           24682                   53
## 21810       36889               80           51153                   80
## 44472       35078              443            8054                  443
## 37859       56496               80           21274                   80
## 29802       55019               53           59602                   53
## 33632       60438            17148           35885                17148
## 29501       63930            44330           31365                44330
## 46373       64658               53            8151                   53
## 28722       59483               23               0                    0
## 56963       65131               53            1840                   53
## 35515       64868               53           56069                   53
## 7642        58638             5900               0                    0
## 9193        65321              445               0                    0
## 50382       54586              445               0                    0
## 27418       53713              443           10339                  443
## 52448       63998            21247               0                    0
## 23072       64711               53           30437                   53
## 30147       43461            35440               0                    0
## 44119       60875              443           10095                  443
## 64169       56942              443           61847                  443
## 12619       56028               53           49252                   53
## 28265       63313              443            9713                  443
## 6073        49598              445               0                    0
## 65152       18889            50584               0                    0
## 52381       34897            50584               0                    0
## 18375       64106               53           27158                   53
## 24541       58862              443           26650                  443
## 34697       39098            51505               0                    0
## 54059       55540              443           32830                  443
## 59260       50979               80           12190                   80
## 27153       53787                1           62839                    1
## 61217       64966               53           51667                   53
## 17470       59668            50584               0                    0
## 41021       57148              445               0                    0
## 62674       53202              443           64942                  443
## 47772       49242               53            1581                   53
## 6539        46014               53           46014                   53
## 39436       62278              445               0                    0
## 49383       54908              445               0                    0
## 30540       59639              445               0                    0
## 62053       56176               53           42328                   53
## 17416       63062              445               0                    0
## 7008        58638             5900               0                    0
## 57263       26324            30188               0                    0
## 37999       50969              443           44036                  443
## 43125       50485               80           52438                   80
## 10021       65093              445               0                    0
## 5759        52077               80           35958                   80
## 3543        63464              443           19526                  443
## 42108       38852            37965               0                    0
## 10333       44894              443           39327                  443
## 22250       61933              445               0                    0
## 63362       53715              443           45128                  443
## 42107       56865              445               0                    0
## 37885       63934               53           32673                   53
## 48353       49843               53           21434                   53
## 56872       55317              443           24510                  443
## 45439       27005            32028           64745                32028
## 9708        22542            30188               0                    0
## 52345       26791              443           23053                  443
## 17467       42238            25174               0                    0
## 53809       51776            14645           62934                14645
## 49239       30179            57470               0                    0
## 43093       51870            40490           10666                40490
## 32259        8592            35993               0                    0
## 12191       49261               53           20112                   53
## 15647       61572               53           61572                   53
## 4951        55653               53           34737                   53
## 23968       49885              445               0                    0
## 19777       49765               53           55763                   53
## 59511       56620              445               0                    0
## 2995        52205            24416           50314                24416
## 5866        34980              443           26995                  443
## 28426       51453            64147               0                    0
## 3616        50131              445               0                    0
## 13391       49667               80            7772                   80
## 39370       49531               53           34898                   53
## 55447       65396               53           55502                   53
## 24869        3478            61463               0                    0
## 44665       49855               53           11171                   53
## 63753       53392               53           53392                   53
## 64667       19099            56205               0                    0
## 16094       54516               53           54516                   53
## 42002       53719              443           48486                  443
## 33910       59099              445               0                    0
## 17145       49686               53           47017                   53
## 46779       50330               80           61342                   80
## 10282       55543             3389           55543                 3389
## 67          52193              443           50924                  443
## 35695       43578            15503               0                    0
## 37981       37965            50321           10306                50321
## 13702       52450              445               0                    0
## 1194        56092               53           54654                   53
## 61358       51440              445               0                    0
## 35140       55457               53           12461                   53
## 10447       53193              445               0                    0
## 50583       27005            27015           62315                27015
## 17473       51433              445               0                    0
## 55426       49229               53           54866                   53
## 21111       50857             3389           50857                 3389
## 55223       57060            50584               0                    0
## 58596       57268              443           29567                  443
## 36957       55000               53           36007                   53
## 51457       58765              445               0                    0
## 2957        56039            50584               0                    0
## 3828        58638             5900           58638                 5900
## 23251       50089              445               0                    0
## 62058       40576              443           40576                  443
## 12622       49953               53           38496                   53
## 45778       56694               53           57041                   53
## 44347       60778              445               0                    0
## 40350       48020             7484               0                    0
## 4716        49841              445               0                    0
## 18161       56603               53           59539                   53
## 40307       47910             7464               0                    0
## 29537       54464            56205               0                    0
## 26918       49170              445               0                    0
## 15757       49694             5900               0                    0
## 63252       30960            45676           24387                45676
## 19603       56328            36237               0                    0
## 41439       35558            15503               0                    0
## 12858       54117              443           30153                  443
## 38480       65497               53           19517                   53
## 65135       61482              443           17696                  443
## 9925        56994               53           35888                   53
## 8855        40845            64147               0                    0
## 12669       49418            22114               0                    0
## 44887       55988              445               0                    0
## 58489       63128              443           30036                  443
## 2916        55725            14704            2998                14704
## 58527       63768            55755           28718                55755
## 32516       56686              443           39990                  443
## 44165       49396               53           21722                   53
## 28845       60118              445               0                    0
## 24255       55057              443            7517                  443
## 49754       59389              445               0                    0
## 10975       64808              445               0                    0
## 43106       49795               53            1877                   53
## 126         53987            16571           20059                16571
## 30358       55391               53           35560                   53
## 39835       62150              445               0                    0
## 11092       62580               23               0                    0
## 58970       49418            27016           62306                27016
## 38068       51401             6881               0                    0
## 56544       64831               53           56461                   53
## 49764       43584            24748               0                    0
## 54740       52140               80            9768                   80
## 63352       60156              443           49389                  443
## 7364        58638             5900               0                    0
## 44939       56593              443           58940                  443
## 10246       49240               53           19807                   53
## 5061        64066               80           28023                   80
## 59974       62462               53           62462                   53
## 44699       56062              445               0                    0
## 31081       64091               53           52948                   53
## 48377       65487               53           58054                   53
## 51888       53770            64147               0                    0
## 62738       53029              443           57596                  443
## 62006       59779              443           65358                  443
## 21966       53202              443           56977                  443
## 42931       58131            30228               0                    0
## 25772       64316               53            5577                   53
## 49250       53629            51413           42166                51413
## 29343       15978            50584               0                    0
## 46830       56553               53           46585                   53
## 64366       55887               53           55333                   53
## 32378       49692               53           65365                   53
## 30177       37578              443           36969                  443
## 63199       50522              445               0                    0
## 4908        56330               53           48374                   53
## 50498       30960            45701           51481                45701
## 8854        60479            25174               0                    0
## 63193       50524              445               0                    0
## 64109       35101              443           31000                  443
## 33814       14036            48817               0                    0
## 56606        3303               80           51748                   80
## 32064       26606            40108               0                    0
## 53274       52643            36237               0                    0
## 58640       50181            64147               0                    0
## 13415       49576               80            3769                   80
## 61401       56759            55201           40009                55201
## 40211       58672            43890               0                    0
## 33513       16647               53           16647                   53
## 22778       61820              445               0                    0
## 3392        60473            22170           57823                22170
## 20397       57035               53           11215                   53
## 57570       49367               53            7025                   53
## 32793       56644               80           27121                   80
## 58987       52751              443            8615                  443
## 37694       49543               80           13855                   80
## 17478       63587            22114               0                    0
## 37416       51967              443           12004                  443
## 46727       51510             7000           51510                 7000
## 59235       61000            15503               0                    0
## 46854       55497               53           48989                   53
## 16105       32866              443           34870                  443
## 47560       55027              445               0                    0
## 64789       50398               80           47950                   80
## 15460       18331               53           15455                   53
## 56809       19610             9571               0                    0
## 16910       40312            55442               0                    0
## 53389       56771               53           26360                   53
## 43035       56657              443           37201                  443
## 60235         443            12105               0                    0
## 41093       50338              443           61638                  443
## 56126       39921               80           15019                   80
## 62730       58981              443            5501                  443
## 45910       63837            14023           10553                14023
## 41809       52724            64147               0                    0
## 1267        55066              443           25317                  443
## 19261       51088              445               0                    0
## 2440        64709               53           54232                   53
## 648         61183            25174               0                    0
## 53215       58234              445               0                    0
## 8936        49275             7000           49275                 7000
## 15448       50597               80           50597                   80
## 15150       64077            25174               0                    0
## 37710       52501               53           50653                   53
## 25677       55684               53           56434                   53
## 43700       53376              443           49944                  443
## 12795        9302              443           20327                  443
## 59550       56600              445               0                    0
## 25684       64894               53            1246                   53
## 41534       61619              445               0                    0
## 59342       56468               80           52689                   80
## 47800       64074               53           30034                   53
## 45862       53478              443           15802                  443
## 38812       49319               53            1935                   53
## 34105       63914            32805           62469                32805
## 40698       49453              443           59315                  443
## 47109       64381               53           47350                   53
## 6070          443            50238               0                    0
## 4936        49869               53           64308                   53
## 48585       56455               53           54287                   53
## 16349       58944              443           12374                  443
## 34766       61802              443           61802                  443
## 30601       64081              443           44288                  443
## 25224       65398               53           64225                   53
## 40039       48706            23393               0                    0
## 21238       50265               23               0                    0
## 38457       55405               53           55361                   53
## 31129       49272               53           25138                   53
## 45840       56720               53           11601                   53
## 39498       57348              445               0                    0
## 937         52194              443           33437                  443
## 41842       61524              445               0                    0
## 10481       50052            65010               0                    0
## 9401        50488            22114               0                    0
## 38591       57601              445               0                    0
## 32514       55184               53           41873                   53
## 36438       49713               53           37729                   53
## 46934       56308               53           53406                   53
## 654         60128            22114               0                    0
## 42409       56769              445               0                    0
## 45238       58649              443           35795                  443
## 53974       57996              445               0                    0
## 20276       62490              445               0                    0
## 28562       35617              443           43217                  443
## 60239        6676            63836               0                    0
## 25939         443            11817               0                    0
## 53410       26900            27017           64029                27017
## 4500        55087               53           19901                   53
## 15016       53373            64147               0                    0
## 44394       53003            11392               0                    0
## 22466       50304              445               0                    0
## 45696       42252            33306               0                    0
## 37977       36126               53           44357                   53
## 4418        36897              443           23482                  443
## 14324       63810            27064           25460                27064
## 26789       55817               53           30944                   53
## 58895       63770            24466           46194                24466
## 19359       58911              443           55607                  443
## 55053       64791              443           54264                  443
## 23932        3928            64147               0                    0
## 18862       56202               53           15464                   53
## 33255       49935               53            2977                   53
## 4635        59977            30188               0                    0
## 57940       39611              443           31755                  443
## 40995       65317            51221               0                    0
## 17949       47910             7454               0                    0
## 51586       39423               80            5862                   80
## 50158        3181               53            3181                   53
## 42441       61338              445               0                    0
## 23657       56770            26467               0                    0
## 47732       55062               53           13920                   53
## 30165       55727               53           48303                   53
## 39466       65494               53               0                    0
## 129         64095             1976           59706                 1976
## 9309        55034              443           64856                  443
## 19029       52503            50584               0                    0
## 62043       57235               53            3428                   53
## 19374       50377               53           41134                   53
## 48164       54636            26467               0                    0
## 38602       57573              445               0                    0
## 45567       55811              445               0                    0
## 25998       49208             6881            3930                 6881
## 8097         1230             3389            1230                 3389
## 56250       14082            50584               0                    0
## 21983       42732               53           10027                   53
## 47031       55454              445               0                    0
## 22223       50386              445               0                    0
## 12560       60481            40075           20355                40075
## 3821        58638             5900           58638                 5900
## 46484       55548              445               0                    0
## 16385       37565              443           55641                  443
## 24859       49624              445               0                    0
## 35478       64813               53           46374                   53
## 24834       61399              445               0                    0
## 44816       50949              443           50949                  443
## 50037       56173            37807               0                    0
## 57070       35353              443           65162                  443
## 31616       59694            50584               0                    0
## 10860       53104              445               0                    0
## 1625        64171               53           47261                   53
## 14913       62394               80           62394                   80
## 12625       62763              443           43714                  443
## 63031       49249              443           43033                  443
## 35057       38834               53           64506                   53
## 6705        53759            42873               0                    0
## 62480       49673              443           32250                  443
## 59860       56547              445               0                    0
## 42139       56519              445               0                    0
## 43081       49935               80           29909                   80
## 59150       52139              445               0                    0
## 36476       37965            32405           17637                32405
## 43734       27005            27017           53801                27017
## 6615        52093               80            7625                   80
## 10184       45364              443           12194                  443
## 9778        65001              445               0                    0
## 20712       60226              443           32684                  443
## 13640       57300               53           43157                   53
## 59389       64494              443           46008                  443
## 5523        50547               53            5293                   53
## 29064       37737              443           61948                  443
## 27924       54744              443           54744                  443
## 14337       57073              443           48824                  443
## 48881       49905               53           29720                   53
## 50532       11133            51505               0                    0
## 55425       65357               53           38110                   53
## 11647       52140            53155            1111                53155
## 2860        43212              443           25002                  443
## 28080       60349              445               0                    0
## 29785       53301            11000            8510                11000
## 39675       63457              443            6711                  443
## 12618       50293               53           25447                   53
## 11206       56661               53           16607                   53
## 50716       59034              445               0                    0
## 24100       62074              443           62074                  443
## 7677        58638             5900               0                    0
## 17044       54959               53           17105                   53
## 41904       53147            37965               0                    0
## 60500       53540            34134           25153                34134
## 23255       61851              445               0                    0
## 15255       63639              445               0                    0
## 38270       62512              445               0                    0
## 60237       62787            51221               0                    0
## 40274       57396              445               0                    0
## 34028       65507               53           47508                   53
## 24954       57565              443           12958                  443
## 24440       49726              445               0                    0
## 4543        56317               53           26526                   53
## 8889        51533            44847               0                    0
## 26404       49348              445               0                    0
## 1332        55392              445               0                    0
## 64088       53375              443           59892                  443
## 22392        2422            62507               0                    0
## 64184       50196              445               0                    0
## 3122        64797               53           38007                   53
## 17259       49255               53           62654                   53
## 35015       50561               80           19741                   80
## 34728       58826              445               0                    0
## 29325       10480            26467               0                    0
## 56196       52480              445               0                    0
## 9522        55199               53           60419                   53
## 8004        52518            35440               0                    0
## 41413       61661              445               0                    0
## 19198       50981              443           28253                  443
## 59999       49285               53           48057                   53
## 47586       61902            22114               0                    0
## 40981       57176              445               0                    0
## 24452       61273              445               0                    0
## 6731        53928            15503               0                    0
## 13483       50023               53           26218                   53
## 60650       59411            37965               0                    0
## 30117       54084              443           63980                  443
## 21332       62247              445               0                    0
## 37240       60720            23718               0                    0
## 31307       59784              443           16677                  443
## 19466       55714               53            2036                   53
## 16111       52138              443           52138                  443
## 41387       25348               23               0                    0
## 18679       46776              443           41505                  443
## 50369       61223            37965               0                    0
## 41349       40474            23117               0                    0
## 29521       64773              445               0                    0
## 13011       64419              445               0                    0
## 7192        58638             5900               0                    0
## 45024       56345               53           18764                   53
## 65239       14641              443           14641                  443
## 25580       49464              445               0                    0
## 37836       57418               53           56760                   53
## 31840       58741              443           39394                  443
## 22142       33886            52255               0                    0
## 62123       55292               53           33154                   53
## 12840        8916              443           25420                  443
## 25670       35889              443           19640                  443
## 31825       57371               53           14687                   53
## 25728       65398               53           27490                   53
## 65151       49908              445               0                    0
## 48911       55773               53           61209                   53
## 57171       49983               53            2734                   53
## 4733        12290            22114               0                    0
## 55275       53031              445               0                    0
## 49213       47700            51505               0                    0
## 10523       53157              445               0                    0
## 20474       57737               53           57737                   53
## 23319       32959              443            7767                  443
## 37131       62767              445               0                    0
## 44281       50584            51651           64737                51651
## 34899       58807              445               0                    0
## 25149       56500               53           23471                   53
## 10468         443            14591               0                    0
## 968         54539            26467               0                    0
## 59957       51881              445               0                    0
## 26999       44628               80           44628                   80
## 4953        49629               53            5781                   53
## 16950       13235            44847               0                    0
## 43154       57878            55108               0                    0
## 10027       65078              445               0                    0
## 45632       34873               80           34873                   80
## 38163         993            18732               0                    0
## 38768       62393              445               0                    0
## 41171       49809              443           59332                  443
## 6703        49440              445               0                    0
## 39721       49632               53           26153                   53
## 61720       52788            64147               0                    0
## 29920       59840              445               0                    0
## 38698       53897              443            3893                  443
## 32774       50635              443           12337                  443
## 6876        57131            31061               0                    0
## 17938       45424               23               0                    0
## 29961       62757            44847               0                    0
## 37801       50363               53           11154                   53
## 64428       50388               53           36843                   53
## 46086       55264               53           35896                   53
## 23041       55310               53           40255                   53
## 42350       47969               53           59484                   53
## 348         52041               80           18599                   80
## 40766       33922              443            8890                  443
## 31159       33545              443           31253                  443
## 18513       55711            26467               0                    0
## 14618       65318               53           63246                   53
## 39559       53147            37965               0                    0
## 5038        56951               53            9479                   53
## 48438       49475               53           56421                   53
## 1831          443            44683               0                    0
## 35323       56794            46824               0                    0
## 37857       55226              443           46963                  443
## 7285        58638             5900               0                    0
## 16144       63385              445               0                    0
## 47434       56090               53           62587                   53
## 9231        53524              445               0                    0
## 45212       60970            20541               0                    0
## 18148       64965            27049           54387                27049
## 43671       54028            37965               0                    0
## 5536        56005               53           33914                   53
## 62174       50867              443           26907                  443
## 38306       55222              443           38278                  443
## 40841       54000               53           38105                   53
## 60676       51661              445               0                    0
## 10502       64941              445               0                    0
## 37436       59966              443           53241                  443
## 4795        50336               53           11367                   53
## 6256        36665              443           51014                  443
## 37313       65395               53           34335                   53
## 50684       59050              445               0                    0
## 34386       50710               80           11292                   80
## 52359       29744               53           29744                   53
## 9890        64932               53            8604                   53
## 65303       50946               80           36199                   80
## 20505       56161               53            2092                   53
## 31820       53746             7546           50898                 7546
## 40972       56872              445               0                    0
## 37810       37009               53           17382                   53
## 54407       59168            39004               0                    0
## 28776       60167              445               0                    0
## 30685       63913            24806            2171                24806
## 39729       49349               80           49349                   80
## 64756       50931               80           59351                   80
## 2159        56904               80           52022                   80
## 43849       58259               53           23484                   53
## 43733       27005            27019           64712                27019
## 55828       34166              443           49916                  443
## 39333       57163               53           45195                   53
## 42519       31995            61689               0                    0
## 21231       54188              443            5906                  443
## 4956        56941               53           56569                   53
## 42738       56981               80           19891                   80
## 12737        9307              443            6375                  443
## 17356       63089              445               0                    0
## 21136       64947               53           11296                   53
## 56176       56962               53           40705                   53
## 47194       39029               53           55192                   53
## 42498       56387              445               0                    0
## 47156       64582               53           25024                   53
## 25443         123              123           34455                  123
## 50638       55041               80           12804                   80
## 36705       56275               53           15148                   53
## 11590       16620            25174               0                    0
## 48757       39039              443            4727                  443
## 10712       64270               53           47275                   53
## 29445       58779              443           44782                  443
## 54553       53419            28189               0                    0
## 56357       49197               53           27457                   53
## 29759       52572               80           52572                   80
## 40385       51956              443           47122                  443
## 60274       20961            35440               0                    0
## 38239       57937              445               0                    0
## 44772       45682                1               0                    0
## 46155       55937              443           21508                  443
## 60110       43484              443           55073                  443
## 44915       53256              443           59014                  443
## 22420       62105              445               0                    0
## 46789       56667               53           35653                   53
## 62768       50699              445               0                    0
## 41684       51221            62348           26234                62348
## 11564       48676            53395               0                    0
## 38847       56922               53           56922                   53
## 21965        1100            27540           45452                27540
## 37530       11478            51221               0                    0
## 16743       57387               53           39674                   53
## 41515       27452             5222           13129                 5222
## 543         55563               53           42353                   53
## 3988        65260               53           63253                   53
## 57777       37580              443            1520                  443
## 25542       43461            35440               0                    0
## 58279       38645            51221               0                    0
## 35155       52426              443           50838                  443
## 5340        58036              443           28433                  443
## 27236       62334              443           24621                  443
## 44362       20198            17372               0                    0
## 32797       49876               53           42623                   53
## 52902       53899              443           60991                  443
## 42171        2828            35253               0                    0
## 3124        57063               53           54503                   53
## 12614       65481               53           28484                   53
## 65506       35608              443           62915                  443
## 18576       13235            44847               0                    0
## 53355       65384               53           58692                   53
## 42124       17010            57470               0                    0
## 38205       56046               53            4218                   53
## 31562        3978            35440               0                    0
## 56730       56307            52700               0                    0
## 1160        42441              123           34753                  123
## 46391       18650            23718               0                    0
## 7486        58638             5900               0                    0
## 54079       61464               80           54948                   80
## 5158        12338            64147               0                    0
## 35908       63104              445               0                    0
## 37132       62766              445               0                    0
## 2820        50269              445               0                    0
## 13283       49874               53           61806                   53
## 23624       57073            57470               0                    0
## 56373        5588             5588               0                    0
## 57240       53561            35885            5770                35885
## 59170       51688              445               0                    0
## 21379        6036            51505               0                    0
## 5372        57319               53           22543                   53
## 33558       46746              443           46746                  443
## 5211        49862              445               0                    0
## 48266       49263               53           22784                   53
## 5001        50578               80           35081                   80
## 53703       53102              443           30366                  443
## 2760        43155            35440               0                    0
## 42501       56709              445               0                    0
## 13955       64030              445               0                    0
## 38479       51977               80           44699                   80
## 2007        64878            26467               0                    0
## 826         37419              443           55901                  443
## 52165       58533              445               0                    0
## 55552       57094               53           10369                   53
## 51633       51221            47391           14393                47391
## 37398       49933               53            6937                   53
## 45954       55725              445               0                    0
## 57835        3521             6881               0                    0
## 5349        53932            27258           53010                27258
## 22697       45510              443           22832                  443
## 56202       52475              445               0                    0
## 13350       50827               80            8180                   80
## 8258        18657               53           18657                   53
## 48253       58077            36653           53473                36653
## 20797       56590            25174               0                    0
## 248         55658              445               0                    0
## 26891       49182              445               0                    0
## 63302       55535               53           24278                   53
## 4057        59103              443           44683                  443
## 29021       51985               80           53937                   80
## 26932       48706            23393               0                    0
## 22840       61779              445               0                    0
## 6230        35797               53           35797                   53
## 41527       61192            35440               0                    0
## 19558       51221             1950           25491                 1950
## 30532       59654              445               0                    0
## 30065        9130               80           10198                   80
## 44966       56572              443           48876                  443
## 49462       53142              443           26091                  443
## 31542       64307              445               0                    0
## 20933       57089              443           54231                  443
## 12377       42490             1433               0                    0
## 16459       27995            22114               0                    0
## 16917           0                0               0                    0
## 48849       15930               53           15930                   53
## 58835       52215              445               0                    0
## 33042       59051              445               0                    0
## 40843       46189              443           46189                  443
## 7860        58638             5900               0                    0
## 5928        56687               53            1087                   53
## 61501       56236              443           26673                  443
## 63685       49885              443           30250                  443
## 14451       63861              445               0                    0
## 8193        51118               80           35500                   80
## 63312       48015              443           20608                  443
## 40372       56413            51221               0                    0
## 59106       56754              445               0                    0
## 57863        1284            50584               0                    0
## 5425        49776              445               0                    0
## 18274       60422              443           51829                  443
## 9576        52161            19897           63116                19897
## 45003       51836            37753           57785                37753
## 61281       62780            35440               0                    0
## 55213       63547            37965               0                    0
## 42753       57791              443           18201                  443
## 5847        49561              445               0                    0
## 10623       53132              445               0                    0
## 7664        58638             5900               0                    0
## 3982        64854               53           54158                   53
## 62328       53115              443           63976                  443
## 7794        58638             5900               0                    0
## 54274       49757               53           31813                   53
## 11335        9486              443           47731                  443
## 22825       61790              445               0                    0
##       Actionallow Actiondeny Actiondrop Actionreset.both    Bytes Bytes.Sent
## 4519            1          0          0                0    13181       6041
## 13921           0          1          0                0       66         66
## 21316           0          0          1                0       66         66
## 61397           1          0          0                0       70         70
## 59091           0          1          0                0       62         62
## 61449           1          0          0                0    12641       5741
## 21342           0          0          1                0       70         70
## 10124           1          0          0                0     1294        864
## 62762           1          0          0                0      330        330
## 42177           0          1          0                0       66         66
## 21317           0          0          1                0       66         66
## 2656            0          0          1                0       70         70
## 35422           1          0          0                0      178         91
## 4781            1          0          0                0      182         93
## 55032           1          0          0                0      177         94
## 2793            0          1          0                0       62         62
## 63544           0          1          0                0       66         66
## 1966            1          0          0                0      768        510
## 58168           1          0          0                0      210         78
## 37824           1          0          0                0     3596       2562
## 48250           1          0          0                0     2062        872
## 41867           1          0          0                0    16232       3319
## 32808           1          0          0                0       82         82
## 60722           0          0          1                0       66         66
## 12209           1          0          0                0      211        110
## 58810           1          0          0                0    10223       3933
## 43979           0          1          0                0       60         60
## 36938           1          0          0                0      307        105
## 14813           0          0          1                0       66         66
## 59979           1          0          0                0      231         88
## 55479           1          0          0                0      216        110
## 41314           1          0          0                0      546        166
## 22837           0          1          0                0       62         62
## 56850           1          0          0                0     6206       1695
## 7337            0          1          0                0       62         62
## 23632           0          0          1                0       66         66
## 16543           1          0          0                0     4531       3268
## 61906           0          0          1                0       70         70
## 64959           0          0          1                0       70         70
## 32094           0          0          1                0       70         70
## 38121           0          0          1                0       70         70
## 48018           1          0          0                0      871        126
## 32945           0          0          1                0       70         70
## 50828           1          0          0                0    12924       3689
## 15931           0          1          0                0       60         60
## 4392            1          0          0                0     1973       1021
## 62548           1          0          0                0     8095       2586
## 28973           1          0          0                0       70         70
## 63723           1          0          0                0      826        826
## 52780           0          1          0                0       78         78
## 47578           0          0          1                0       70         70
## 22171           1          0          0                0     3310       1535
## 50173           1          0          0                0      296         87
## 22596           1          0          0                0       70         70
## 41014           0          1          0                0       66         66
## 37644           0          0          1                0       66         66
## 44523           0          1          0                0      109        109
## 40851           1          0          0                0      422        164
## 2252            0          0          1                0       70         70
## 43215           0          0          1                0       70         70
## 6227            1          0          0                0    79441       3092
## 3920            1          0          0                0      199         94
## 60301           0          1          0                0       62         62
## 3081            1          0          0                0      184         94
## 8570            1          0          0                0      701         88
## 65470           1          0          0                0       66         66
## 28805           0          1          0                0       66         66
## 41110           0          1          0                0       62         62
## 25385           0          0          1                0       70         70
## 44242           1          0          0                0      494        288
## 35075           0          1          0                0      146        146
## 32480           1          0          0                0      172         94
## 54476           1          0          0                0      701         88
## 51675           0          1          0                0       66         66
## 44625           1          0          0                0      213         90
## 45247           1          0          0                0     2312       1382
## 39488           0          0          1                0       70         70
## 8487            0          1          0                0       62         62
## 5842            1          0          0                0      193        102
## 23741           1          0          0                0      847        110
## 59676           1          0          0                0      205         86
## 58646           0          1          0                0       62         62
## 28429           0          1          0                0       72         72
## 64070           1          0          0                0      232        110
## 17301           1          0          0                0      197        102
## 10147           1          0          0                0    11804       4009
## 34908           0          0          1                0       70         70
## 13079           0          1          0                0       66         66
## 25136           1          0          0                0      166         86
## 53449           1          0          0                0       78         78
## 17039           1          0          0                0      318         95
## 65011           0          0          1                0       70         70
## 30757           1          0          0                0      195        102
## 32937           0          0          1                0       70         70
## 61594           1          0          0                0     1353        903
## 41164           1          0          0                0     7554       2281
## 37252           1          0          0                0      193        102
## 31019           0          1          0                0       66         66
## 41787           0          0          1                0       66         66
## 30595           1          0          0                0      168         78
## 32431           1          0          0                0      211        110
## 26499           0          1          0                0       62         62
## 30173           1          0          0                0     9329       2900
## 55813           1          0          0                0     9340       4256
## 62233           0          0          1                0       66         66
## 51031           0          1          0                0       62         62
## 23204           0          1          0                0       66         66
## 23677           0          1          0                0       66         66
## 45799           1          0          0                0     7211        823
## 54341           0          1          0                0       62         62
## 26900           0          1          0                0       62         62
## 51885           1          0          0                0      213         87
## 1749            0          1          0                0      146        146
## 64900           1          0          0                0      214        102
## 58048           1          0          0                0      819        102
## 57868           0          0          1                0       70         70
## 29692           1          0          0                0      815         95
## 23808           1          0          0                0      969         86
## 8669            1          0          0                0     8702       4668
## 55070           1          0          0                0       66         66
## 4420            1          0          0                0    17936      14273
## 4438            1          0          0                0      303         94
## 13451           1          0          0                0      183        102
## 20527           1          0          0                0    10101       2782
## 11491           1          0          0                0      366        240
## 2340            0          1          0                0       62         62
## 42925           0          0          1                0       66         66
## 44740           0          1          0                0       60         60
## 28485           1          0          0                0    60483      25857
## 37233           1          0          0                0      192         90
## 57552           1          0          0                0      492        306
## 4022            1          0          0                0       66         66
## 27255           0          1          0                0       66         66
## 40267           0          0          1                0       66         66
## 22745           0          1          0                0       62         62
## 18627           1          0          0                0      814        110
## 21567           1          0          0                0      874        126
## 14724           1          0          0                0       78         78
## 63564           0          0          1                0       66         66
## 41864           1          0          0                0    38985       4250
## 58136           1          0          0                0      179         86
## 2761            0          0          1                0       70         70
## 63672           1          0          0                0   395978      13168
## 53764           0          0          1                0       70         70
## 21811           1          0          0                0     7641       5165
## 9941            1          0          0                0     2308       1541
## 25445           1          0          0                0       93         93
## 9207            0          1          0                0       62         62
## 22495           0          0          1                0       70         70
## 60133           1          0          0                0     4224       2055
## 27618           1          0          0                0      292         94
## 64536           0          1          0                0       74         74
## 59619           1          0          0                0      138         69
## 33984           0          0          1                0       70         70
## 46358           1          0          0                0     8962       1601
## 36883           1          0          0                0      168         78
## 54999           0          0          1                0       66         66
## 37217           1          0          0                0      752         93
## 29937           0          1          0                0       60         60
## 58791           1          0          0                0       66         66
## 9019            1          0          0                0      260         86
## 27556           1          0          0                0   283970      23419
## 50951           1          0          0                0      889        126
## 53670           0          0          1                0       70         70
## 1998            0          1          0                0      146        146
## 5556            1          0          0                0    10492       3167
## 34498           1          0          0                0     3522        869
## 8791            0          1          0                0      146        146
## 35198           1          0          0                0     5463       1378
## 49599           1          0          0                0      229        110
## 10546           1          0          0                0     1626        798
## 7832            0          1          0                0       62         62
## 5724            0          1          0                0       60         60
## 53065           1          0          0                0  2582500      86854
## 55222           0          1          0                0       66         66
## 60563           0          1          0                0       66         66
## 6649            1          0          0                0       70         70
## 16201           0          0          1                0       70         70
## 37112           0          1          0                0       66         66
## 39159           1          0          0                0      316        136
## 50280           1          0          0                0      194         70
## 21664           0          0          1                0       70         70
## 31355           1          0          0                0      195        102
## 33033           0          0          1                0       70         70
## 38411           1          0          0                0    38272       2881
## 36913           1          0          0                0     5624       2099
## 58817           1          0          0                0      462        462
## 9343            1          0          0                0      422        164
## 39610           1          0          0                0      130         70
## 41479           0          0          1                0       66         66
## 36548           0          0          1                0       70         70
## 9626            0          1          0                0       62         62
## 48451           1          0          0                0    11580       4032
## 56007           1          0          0                0      199        102
## 42634           1          0          0                0     7740       4267
## 63808           1          0          0                0    26154       6874
## 25140           1          0          0                0      200        102
## 49492           1          0          0                0      264         70
## 3807            1          0          0                0      190         97
## 13077           0          1          0                0       60         60
## 18195           1          0          0                0      199        102
## 50565           1          0          0                0     6308        829
## 38529           1          0          0                0      198        102
## 508             1          0          0                0       70         70
## 16306           1          0          0                0      728        102
## 10393           0          0          1                0       66         66
## 30229           1          0          0                0      307        105
## 34807           1          0          0                0      613        539
## 27594           1          0          0                0     1386       1386
## 21686           0          1          0                0       62         62
## 63632           0          1          0                0       60         60
## 31394           1          0          0                0     8083       1790
## 51622           1          0          0                0      881        126
## 23887           1          0          0                0     8230       2275
## 61223           1          0          0                0    10377       3140
## 27278           0          0          1                0       70         70
## 44310           0          0          1                0       66         66
## 15214           0          0          1                0       66         66
## 49693           1          0          0                0      134         60
## 9004            1          0          0                0   970703      43045
## 63308           1          0          0                0       66         66
## 20938           1          0          0                0      180         92
## 14241           1          0          0                0      318        192
## 39693           1          0          0                0  1574859      55851
## 56361           1          0          0                0    10239       2017
## 42726           1          0          0                0     7481       2070
## 6999            0          1          0                0       62         62
## 56448           0          0          1                0       66         66
## 51193           1          0          0                0      255        110
## 10311           1          0          0                0      240         98
## 53946           0          0          1                0       66         66
## 63100           0          1          0                0      145        145
## 32192           1          0          0                0      366        240
## 17834           1          0          0                0     3335       2314
## 11840           1          0          0                0     3514       1874
## 18396           1          0          0                0      197        102
## 36974           1          0          0                0      320        140
## 51924           1          0          0                0      881        126
## 1154            1          0          0                0    38455       5252
## 37419           1          0          0                0     1640        831
## 12061           1          0          0                0      153         93
## 47095           1          0          0                0     3431       2292
## 20634           1          0          0                0      194        102
## 11071           0          1          0                0       66         66
## 34051           1          0          0                0      130         70
## 41868           1          0          0                0    13580       3864
## 20200           1          0          0                0      200        102
## 43393           0          1          0                0       62         62
## 50015           0          1          0                0       60         60
## 19855           0          0          1                0       70         70
## 19289           0          1          0                0       66         66
## 59568           0          1          0                0       62         62
## 20456           0          1          0                0       62         62
## 51753           0          1          0                0       62         62
## 56755           0          1          0                0       66         66
## 1396            0          0          1                0       66         66
## 24703           1          0          0                0    10391       7059
## 47162           1          0          0                0       66         66
## 41100           0          0          1                0       70         70
## 43756           1          0          0                0       70         70
## 47463           1          0          0                0      260         86
## 36910           1          0          0                0      216        110
## 21523           1          0          0                0     5663       1711
## 51106           0          1          0                0       66         66
## 36874           1          0          0                0      199        102
## 24069           1          0          0                0      168         86
## 48118           0          0          1                0       70         70
## 35405           1          0          0                0     1916       1121
## 4221            0          0          1                0       70         70
## 52109           0          1          0                0       62         62
## 15172           0          1          0                0      146        146
## 13853           1          0          0                0      179         94
## 45875           1          0          0                0    46690       9390
## 13064           0          1          0                0       66         66
## 13594           0          0          1                0       70         70
## 18344           1          0          0                0      183         94
## 38000           1          0          0                0      537        158
## 55635           1          0          0                0 23328780     730794
## 4484            1          0          0                0      156         94
## 3323            1          0          0                0      184         94
## 17453           0          1          0                0       62         62
## 56203           0          1          0                0       74         74
## 45239           1          0          0                0       70         70
## 58768           1          0          0                0      231         71
## 33224           1          0          0                0      142        142
## 48820           0          1          0                0       62         62
## 49940           1          0          0                0       66         66
## 49139           1          0          0                0     2125       1159
## 28207           0          0          1                0       70         70
## 23732           1          0          0                0     8272       2455
## 20414           1          0          0                0      188         96
## 2857            1          0          0                0     8073       2053
## 34688           1          0          0                0    13709       1675
## 47876           1          0          0                0      350        210
## 44706           0          1          0                0       66         66
## 7466            0          1          0                0       62         62
## 299             1          0          0                0      316        136
## 13880           1          0          0                0      334        158
## 41753           0          1          0                0       60         60
## 8882            0          0          1                0       70         70
## 19941           1          0          0                0      199        102
## 32088           0          1          0                0       60         60
## 6832            0          0          1                0       66         66
## 42415           0          0          1                0       70         70
## 65526           1          0          0                0      356        118
## 2895            1          0          0                0      212        108
## 58580           1          0          0                0      183         94
## 25475           1          0          0                0      204        104
## 65009           0          1          0                0       62         62
## 10315           1          0          0                0      197        102
## 53370           1          0          0                0     6015       3964
## 35060           1          0          0                0      232         88
## 60614           0          0          1                0       66         66
## 50106           0          1          0                0       62         62
## 8385            0          0          1                0       70         70
## 10103           0          1          0                0       62         62
## 65059           0          1          0                0      148        148
## 42426           0          1          0                0       60         60
## 26704           1          0          0                0     1511        849
## 13729           1          0          0                0      882        527
## 5790            1          0          0                0   101524       1673
## 26589           0          0          1                0       70         70
## 16578           1          0          0                0      156        156
## 4490            1          0          0                0      193        102
## 21604           1          0          0                0      942        527
## 18982           0          1          0                0       66         66
## 13694           0          0          1                0       66         66
## 50725           0          0          1                0       70         70
## 39406           0          1          0                0       60         60
## 30599           1          0          0                0      172         88
## 43405           0          0          1                0       70         70
## 39561           0          0          1                0       70         70
## 9926            1          0          0                0      102        102
## 9780            0          0          1                0       70         70
## 52308           1          0          0                0     9277       3025
## 9877            1          0          0                0      382         94
## 26063           1          0          0                0     6553       1034
## 53399           1          0          0                0     5939        772
## 9951            1          0          0                0     7536       1915
## 56439           0          1          0                0       62         62
## 62318           1          0          0                0    72734      14614
## 50879           1          0          0                0      214        140
## 36016           1          0          0                0      256         88
## 22278           1          0          0                0       70         70
## 48370           1          0          0                0   123082       8734
## 57086           1          0          0                0     7222       1633
## 22735           0          1          0                0      146        146
## 6738            0          0          1                0       70         70
## 20708           1          0          0                0      327         88
## 46397           0          1          0                0       62         62
## 22130           0          1          0                0       66         66
## 40081           1          0          0                0      251        102
## 6501            1          0          0                0     3272        800
## 18037           0          1          0                0       66         66
## 18950           1          0          0                0      189        102
## 43834           1          0          0                0      184        102
## 60049           1          0          0                0     9274       5282
## 60588           0          0          1                0       70         70
## 62148           1          0          0                0      134         60
## 3520            1          0          0                0      388        134
## 21810           1          0          0                0     1584       1130
## 44472           1          0          0                0      234        234
## 37859           1          0          0                0   458684      19849
## 29802           1          0          0                0      299        110
## 33632           1          0          0                0       70         70
## 29501           1          0          0                0       70         70
## 46373           1          0          0                0      195        102
## 28722           0          1          0                0       60         60
## 56963           1          0          0                0      168         86
## 35515           1          0          0                0      216        102
## 7642            0          1          0                0       62         62
## 9193            0          0          1                0       66         66
## 50382           0          0          1                0       66         66
## 27418           1          0          0                0    33323       3349
## 52448           0          1          0                0       66         66
## 23072           1          0          0                0      179         86
## 30147           0          1          0                0      146        146
## 44119           1          0          0                0     1786        172
## 64169           1          0          0                0      494        420
## 12619           1          0          0                0      770        102
## 28265           1          0          0                0     1786        172
## 6073            0          0          1                0       66         66
## 65152           0          1          0                0      146        146
## 52381           0          1          0                0       62         62
## 18375           1          0          0                0      195        102
## 24541           1          0          0                0     5167        700
## 34697           0          1          0                0      146        146
## 54059           1          0          0                0    10458       5272
## 59260           1          0          0                0   576971       8128
## 27153           1          0          0                0      320        140
## 61217           1          0          0                0      458        188
## 17470           0          1          0                0       66         66
## 41021           0          0          1                0       66         66
## 62674           1          0          0                0     3655       2344
## 47772           1          0          0                0      167         86
## 6539            1          0          0                0      414        268
## 39436           0          0          1                0       70         70
## 49383           0          0          1                0       66         66
## 30540           0          0          1                0       70         70
## 62053           1          0          0                0      194        102
## 17416           0          0          1                0       70         70
## 7008            0          1          0                0       62         62
## 57263           0          1          0                0       62         62
## 37999           1          0          0                0     7221       2265
## 43125           1          0          0                0      318        192
## 10021           0          0          1                0       66         66
## 5759            1          0          0                0      366        240
## 3543            1          0          0                0     8543       1114
## 42108           0          1          0                0       62         62
## 10333           1          0          0                0     1849       1130
## 22250           0          0          1                0       70         70
## 63362           1          0          0                0       70         70
## 42107           0          0          1                0       66         66
## 37885           1          0          0                0      209        110
## 48353           1          0          0                0      193        102
## 56872           1          0          0                0     5692       1239
## 45439           1          0          0                0      900        126
## 9708            0          1          0                0       62         62
## 52345           1          0          0                0    21391      16334
## 17467           0          1          0                0       62         62
## 53809           1          0          0                0       70         70
## 49239           0          1          0                0       60         60
## 43093           1          0          0                0       70         70
## 32259           0          1          0                0       66         66
## 12191           1          0          0                0      453        186
## 15647           1          0          0                0      213         79
## 4951            1          0          0                0      199        102
## 23968           0          0          1                0       70         70
## 19777           1          0          0                0      188         96
## 59511           0          0          1                0       70         70
## 2995            1          0          0                0      136        136
## 5866            1          0          0                0     8211       2165
## 28426           0          1          0                0       66         66
## 3616            0          0          1                0       70         70
## 13391           1          0          0                0    27534       4859
## 39370           1          0          0                0      183         94
## 55447           1          0          0                0      186        102
## 24869           0          1          0                0     1345       1345
## 44665           1          0          0                0      290        102
## 63753           1          0          0                0      832        106
## 64667           0          1          0                0       62         62
## 16094           1          0          0                0      235         90
## 42002           1          0          0                0       70         70
## 33910           0          0          1                0       66         66
## 17145           1          0          0                0      183         94
## 46779           1          0          0                0      736        670
## 10282           1          0          0                0     3488       1774
## 67              1          0          0                0    13948       1150
## 35695           0          1          0                0       74         74
## 37981           1          0          0                0      158        158
## 13702           0          0          1                0       70         70
## 1194            1          0          0                0      177         94
## 61358           0          0          1                0       66         66
## 35140           1          0          0                0      177         94
## 10447           0          0          1                0       70         70
## 50583           1          0          0                0     1006        252
## 17473           0          0          1                0       70         70
## 55426           1          0          0                0      178         94
## 21111           1          0          0                0     3613       1594
## 55223           0          1          0                0       62         62
## 58596           1          0          0                0    11546       3604
## 36957           1          0          0                0      193        102
## 51457           0          0          1                0       70         70
## 2957            0          1          0                0       66         66
## 3828            1          0          0                0       62         62
## 23251           0          0          1                0       70         70
## 62058           1          0          0                0     4433       1526
## 12622           1          0          0                0      757         98
## 45778           1          0          0                0      196        102
## 44347           0          0          1                0       70         70
## 40350           0          1          0                0       60         60
## 4716            0          0          1                0       70         70
## 18161           1          0          0                0      168         86
## 40307           0          1          0                0       60         60
## 29537           0          1          0                0       66         66
## 26918           0          0          1                0       70         70
## 15757           0          1          0                0       60         60
## 63252           1          0          0                0      396        396
## 19603           0          1          0                0       66         66
## 41439           0          1          0                0       66         66
## 12858           1          0          0                0   661695      28430
## 38480           1          0          0                0      490        111
## 65135           1          0          0                0    49181       3553
## 9925            1          0          0                0      177         94
## 8855            0          1          0                0       62         62
## 12669           0          1          0                0       66         66
## 44887           0          0          1                0       66         66
## 58489           1          0          0                0     4994        379
## 2916            1          0          0                0       70         70
## 58527           1          0          0                0       66         66
## 32516           1          0          0                0     7941       1952
## 44165           1          0          0                0      177         94
## 28845           0          0          1                0       70         70
## 24255           1          0          0                0     1364        940
## 49754           0          0          1                0       70         70
## 10975           0          0          1                0       66         66
## 43106           1          0          0                0      211        102
## 126             1          0          0                0       70         70
## 30358           1          0          0                0      210        110
## 39835           0          0          1                0       70         70
## 11092           0          1          0                0       60         60
## 58970           1          0          0                0      261         78
## 38068           0          1          0                0       66         66
## 56544           1          0          0                0      216        110
## 49764           0          1          0                0      156        156
## 54740           1          0          0                0     3476       1433
## 63352           1          0          0                0      178        118
## 7364            0          1          0                0       62         62
## 44939           1          0          0                0     1165        662
## 10246           1          0          0                0      330        101
## 5061            1          0          0                0      314        192
## 59974           1          0          0                0      238         99
## 44699           0          0          1                0       66         66
## 31081           1          0          0                0      172         88
## 48377           1          0          0                0      184         94
## 51888           0          1          0                0       66         66
## 62738           1          0          0                0     9246       1808
## 62006           1          0          0                0    18360       1734
## 21966           1          0          0                0    23323       1589
## 42931           0          1          0                0      146        146
## 25772           1          0          0                0      184         94
## 49250           1          0          0                0       70         70
## 29343           0          1          0                0      146        146
## 46830           1          0          0                0      199        102
## 64366           1          0          0                0      212         94
## 32378           1          0          0                0      212        110
## 30177           1          0          0                0     7534       2392
## 63199           0          0          1                0       70         70
## 4908            1          0          0                0      271         95
## 50498           1          0          0                0      330        330
## 8854            0          1          0                0       62         62
## 63193           0          0          1                0       70         70
## 64109           1          0          0                0  4988930     161289
## 33814           0          1          0                0       66         66
## 56606           1          0          0                0     2338       1419
## 32064           0          1          0                0      145        145
## 53274           0          1          0                0       66         66
## 58640           0          1          0                0       66         66
## 13415           1          0          0                0      318        192
## 61401           1          0          0                0       66         66
## 40211           0          1          0                0       66         66
## 33513           1          0          0                0      233         91
## 22778           0          0          1                0       70         70
## 3392            1          0          0                0     1599        743
## 20397           1          0          0                0     1414         95
## 57570           1          0          0                0      548        112
## 32793           1          0          0                0      234        234
## 58987           1          0          0                0     5378        984
## 37694           1          0          0                0     1548        847
## 17478           0          1          0                0       62         62
## 37416           1          0          0                0    21063      13499
## 46727           1          0          0                0     3490       1923
## 59235           0          1          0                0      146        146
## 46854           1          0          0                0      168         86
## 16105           1          0          0                0    23467      16522
## 47560           0          0          1                0       70         70
## 64789           1          0          0                0      318        192
## 15460           1          0          0                0      376         94
## 56809           0          1          0                0       62         62
## 16910           0          1          0                0       62         62
## 53389           1          0          0                0      193        102
## 43035           1          0          0                0     1342        918
## 60235           0          1          0                0       66         66
## 41093           1          0          0                0     6441       1651
## 56126           1          0          0                0     1151        711
## 62730           1          0          0                0     3155        836
## 45910           1          0          0                0       66         66
## 41809           0          1          0                0       62         62
## 1267            1          0          0                0    12715       3716
## 19261           0          0          1                0       70         70
## 2440            1          0          0                0      338        110
## 648             0          1          0                0       62         62
## 53215           0          0          1                0       70         70
## 8936            1          0          0                0     2163        676
## 15448           1          0          0                0    16296       9143
## 15150           0          1          0                0       66         66
## 37710           1          0          0                0      201         80
## 25677           1          0          0                0      193        102
## 43700           1          0          0                0     4814       1207
## 12795           1          0          0                0     4216        771
## 59550           0          0          1                0       70         70
## 25684           1          0          0                0      550        113
## 41534           0          0          1                0       70         70
## 59342           1          0          0                0      214        140
## 47800           1          0          0                0      209        110
## 45862           1          0          0                0     7692       2183
## 38812           1          0          0                0      166         86
## 34105           1          0          0                0       66         66
## 40698           1          0          0                0     8251       2124
## 47109           1          0          0                0      196        100
## 6070            0          1          0                0       99         99
## 4936            1          0          0                0      209        110
## 48585           1          0          0                0      183         94
## 16349           1          0          0                0     5166        700
## 34766           1          0          0                0     4521       1614
## 30601           1          0          0                0     7280       2277
## 25224           1          0          0                0      214        110
## 40039           0          1          0                0       60         60
## 21238           0          1          0                0       60         60
## 38457           1          0          0                0      199        102
## 31129           1          0          0                0      168         86
## 45840           1          0          0                0      177         94
## 39498           0          0          1                0       70         70
## 937             1          0          0                0     9075       3727
## 41842           0          0          1                0       70         70
## 10481           0          1          0                0       60         60
## 9401            0          1          0                0       66         66
## 38591           0          0          1                0       70         70
## 32514           1          0          0                0      183         94
## 36438           1          0          0                0      177         94
## 46934           1          0          0                0      700        282
## 654             0          1          0                0       62         62
## 42409           0          0          1                0       66         66
## 45238           1          0          0                0     7701       2215
## 53974           0          0          1                0       70         70
## 20276           0          0          1                0       70         70
## 28562           1          0          0                0     2981       1719
## 60239           0          1          0                0      145        145
## 25939           0          1          0                0       74         74
## 53410           1          0          0                0      422        164
## 4500            1          0          0                0      183         94
## 15016           0          1          0                0       66         66
## 44394           0          1          0                0       66         66
## 22466           0          0          1                0       70         70
## 45696           0          1          0                0       60         60
## 37977           1          0          0                0      326         78
## 4418            1          0          0                0     6351       4202
## 14324           1          0          0                0     1778       1386
## 26789           1          0          0                0      392        102
## 58895           1          0          0                0      312        132
## 19359           1          0          0                0     5168        700
## 55053           1          0          0                0     8580       5765
## 23932           0          1          0                0       62         62
## 18862           1          0          0                0      196        102
## 33255           1          0          0                0      370         94
## 4635            0          1          0                0       66         66
## 57940           1          0          0                0      234        234
## 40995           0          1          0                0       78         78
## 17949           0          1          0                0       60         60
## 51586           1          0          0                0      234        234
## 50158           1          0          0                0      214         97
## 42441           0          0          1                0       70         70
## 23657           0          1          0                0       66         66
## 47732           1          0          0                0      733         86
## 30165           1          0          0                0      177         94
## 39466           0          1          0                0       88         88
## 129             1          0          0                0       70         70
## 9309            1          0          0                0    45487       4986
## 19029           0          1          0                0       66         66
## 62043           1          0          0                0      212        102
## 19374           1          0          0                0      214        110
## 48164           0          1          0                0       66         66
## 38602           0          0          1                0       70         70
## 45567           0          0          1                0       66         66
## 25998           1          0          0                0       70         70
## 8097            1          0          0                0     3910       1805
## 56250           0          1          0                0       62         62
## 21983           1          0          0                0       86         86
## 47031           0          0          1                0       66         66
## 22223           0          0          1                0       70         70
## 12560           1          0          0                0       70         70
## 3821            1          0          0                0       62         62
## 46484           0          0          1                0       66         66
## 16385           1          0          0                0     4898       1119
## 24859           0          0          1                0       70         70
## 35478           1          0          0                0      177         94
## 24834           0          0          1                0       66         66
## 44816           1          0          0                0     4453       1546
## 50037           0          1          0                0       62         62
## 57070           1          0          0                0     1833        991
## 31616           0          1          0                0       62         62
## 10860           0          0          1                0       70         70
## 1625            1          0          0                0      179         94
## 14913           1          0          0                0    29401      10036
## 12625           1          0          0                0     7755       4481
## 63031           1          0          0                0    35406      17785
## 35057           1          0          0                0      168         78
## 6705            0          1          0                0       74         74
## 62480           1          0          0                0     6404        829
## 59860           0          0          1                0       70         70
## 42139           0          0          1                0       70         70
## 43081           1          0          0                0     1130        613
## 59150           0          0          1                0       66         66
## 36476           1          0          0                0      766        248
## 43734           1          0          0                0     1634        126
## 6615            1          0          0                0     3019        716
## 10184           1          0          0                0     9276       2422
## 9778            0          0          1                0       70         70
## 20712           1          0          0                0  1907736      31488
## 13640           1          0          0                0      184         94
## 59389           1          0          0                0     2872        717
## 5523            1          0          0                0      210        110
## 29064           1          0          0                0   190013      14784
## 27924           1          0          0                0     4737       2854
## 14337           1          0          0                0     7731       2016
## 48881           1          0          0                0      180         94
## 50532           0          1          0                0       66         66
## 55425           1          0          0                0      194        102
## 11647           1          0          0                0       70         70
## 2860            1          0          0                0     7476       2192
## 28080           0          0          1                0       70         70
## 29785           1          0          0                0     1642        822
## 39675           1          0          0                0      606        300
## 12618           1          0          0                0      740         93
## 11206           1          0          0                0      288         93
## 50716           0          0          1                0       70         70
## 24100           1          0          0                0   151233      12592
## 7677            0          1          0                0       62         62
## 17044           1          0          0                0      206        105
## 41904           0          1          0                0       62         62
## 60500           1          0          0                0       70         70
## 23255           0          0          1                0       66         66
## 15255           0          0          1                0       70         70
## 38270           0          0          1                0       70         70
## 60237           0          1          0                0       66         66
## 40274           0          0          1                0       66         66
## 34028           1          0          0                0      167         86
## 24954           1          0          0                0     3586       1074
## 24440           0          0          1                0       70         70
## 4543            1          0          0                0      183         94
## 8889            0          1          0                0       66         66
## 26404           0          0          1                0       70         70
## 1332            0          0          1                0       70         70
## 64088           1          0          0                0     4317       1134
## 22392           0          1          0                0       66         66
## 64184           0          0          1                0       70         70
## 3122            1          0          0                0      183         94
## 17259           1          0          0                0      212        102
## 35015           1          0          0                0      318        192
## 34728           0          0          1                0       66         66
## 29325           0          1          0                0       62         62
## 56196           0          0          1                0       70         70
## 9522            1          0          0                0      223         78
## 8004            0          1          0                0       66         66
## 41413           0          0          1                0       70         70
## 19198           1          0          0                0     8407       3736
## 59999           1          0          0                0      168         78
## 47586           0          1          0                0       66         66
## 40981           0          0          1                0       66         66
## 24452           0          0          1                0       70         70
## 6731            0          1          0                0       62         62
## 13483           1          0          0                0      166         86
## 60650           0          1          0                0       66         66
## 30117           1          0          0                0    37440       7952
## 21332           0          0          1                0       70         70
## 37240           0          1          0                0       66         66
## 31307           1          0          0                0     6698       1606
## 19466           1          0          0                0      165         86
## 16111           1          0          0                0    80933      20860
## 41387           0          1          0                0       60         60
## 18679           1          0          0                0     7389       1786
## 50369           0          1          0                0       66         66
## 41349           0          1          0                0      146        146
## 29521           0          0          1                0       70         70
## 13011           0          0          1                0       66         66
## 7192            0          1          0                0       62         62
## 45024           1          0          0                0      199         94
## 65239           1          0          0                0   254281      66535
## 25580           0          0          1                0       70         70
## 37836           1          0          0                0      209        102
## 31840           1          0          0                0     5168        700
## 22142           0          1          0                0      162        162
## 62123           1          0          0                0      211        102
## 12840           1          0          0                0     7685       1016
## 25670           1          0          0                0     6982       1728
## 31825           1          0          0                0      321        108
## 25728           1          0          0                0      214        110
## 65151           0          0          1                0       70         70
## 48911           1          0          0                0      199        102
## 57171           1          0          0                0      211        110
## 4733            0          1          0                0       62         62
## 55275           0          0          1                0       66         66
## 49213           0          1          0                0       66         66
## 10523           0          0          1                0       70         70
## 20474           1          0          0                0      374        240
## 23319           1          0          0                0     9262       2792
## 37131           0          0          1                0       70         70
## 44281           1          0          0                0      166        166
## 34899           0          0          1                0       66         66
## 25149           1          0          0                0      199        102
## 10468           0          1          0                0       60         60
## 968             0          1          0                0       62         62
## 59957           0          0          1                0       66         66
## 26999           1          0          0                0     2898       1485
## 4953            1          0          0                0      198        102
## 16950           0          1          0                0       66         66
## 43154           0          1          0                0      148        148
## 10027           0          0          1                0       66         66
## 45632           1          0          0                0     2148        918
## 38163           0          1          0                0      129        129
## 38768           0          0          1                0       70         70
## 41171           1          0          0                0    83683       2747
## 6703            0          0          1                0       66         66
## 39721           1          0          0                0      199        102
## 61720           0          1          0                0       66         66
## 29920           0          0          1                0       70         70
## 38698           1          0          0                0    10781       3635
## 32774           1          0          0                0     8252       1702
## 6876            0          1          0                0      145        145
## 17938           0          1          0                0       60         60
## 29961           0          1          0                0      146        146
## 37801           1          0          0                0      214        110
## 64428           1          0          0                0      177         94
## 46086           1          0          0                0      170         87
## 23041           1          0          0                0      168         78
## 42350           1          0          0                0      199         94
## 348             1          0          0                0     2901       1407
## 40766           1          0          0                0     6084       1842
## 31159           1          0          0                0     9737       3353
## 18513           0          1          0                0       62         62
## 14618           1          0          0                0      294         94
## 39559           0          1          0                0       62         62
## 5038            1          0          0                0      199        102
## 48438           1          0          0                0      196        102
## 1831            0          1          0                0       83         83
## 35323           0          1          0                0       66         66
## 37857           1          0          0                0     8025       3048
## 7285            0          1          0                0       62         62
## 16144           0          0          1                0       70         70
## 47434           1          0          0                0      195         94
## 9231            0          0          1                0       70         70
## 45212           0          1          0                0       66         66
## 18148           1          0          0                0      747        462
## 43671           0          1          0                0       66         66
## 5536            1          0          0                0      199        102
## 62174           1          0          0                0     6351       1370
## 38306           1          0          0                0     5312       1510
## 40841           1          0          0                0      294         86
## 60676           0          0          1                0       66         66
## 10502           0          0          1                0       66         66
## 37436           1          0          0                0     7165       1312
## 4795            1          0          0                0      240         76
## 6256            1          0          0                0  5814771      76275
## 37313           1          0          0                0      754         94
## 50684           0          0          1                0       70         70
## 34386           1          0          0                0      708        384
## 52359           1          0          0                0      235         90
## 9890            1          0          0                0      230        102
## 65303           1          0          0                0  5870176      83236
## 20505           1          0          0                0      194        102
## 31820           1          0          0                0       70         70
## 40972           0          0          1                0       70         70
## 37810           1          0          0                0       78         78
## 54407           0          1          0                0       62         62
## 28776           0          0          1                0       70         70
## 30685           1          0          0                0      630        508
## 39729           1          0          0                0      450        246
## 64756           1          0          0                0  7978844      97517
## 2159            1          0          0                0    25125       2681
## 43849           1          0          0                0      168         78
## 43733           1          0          0                0     1624        126
## 55828           1          0          0                0     8290       2156
## 39333           1          0          0                0      209        110
## 42519           0          1          0                0      146        146
## 21231           1          0          0                0     2895       1646
## 4956            1          0          0                0      193        102
## 42738           1          0          0                0      214        140
## 12737           1          0          0                0     4458       1022
## 17356           0          0          1                0       70         70
## 21136           1          0          0                0      275         96
## 56176           1          0          0                0      177         94
## 47194           1          0          0                0      198         93
## 42498           0          0          1                0       70         70
## 47156           1          0          0                0      202        103
## 25443           1          0          0                0      184         94
## 50638           1          0          0                0      486        240
## 36705           1          0          0                0      164         84
## 11590           0          1          0                0       62         62
## 48757           1          0          0                0      572        292
## 10712           1          0          0                0      455        102
## 29445           1          0          0                0     5166        700
## 54553           0          1          0                0       60         60
## 56357           1          0          0                0      751         95
## 29759           1          0          0                0     1899       1310
## 40385           1          0          0                0     8256       3157
## 60274           0          1          0                0       62         62
## 38239           0          0          1                0       66         66
## 44772           0          1          0                0       62         62
## 46155           1          0          0                0     3363        797
## 60110           1          0          0                0      130         70
## 44915           1          0          0                0    29039       3057
## 22420           0          0          1                0       66         66
## 46789           1          0          0                0      199        102
## 62768           0          0          1                0       70         70
## 41684           1          0          0                0      132        132
## 11564           0          1          0                0       60         60
## 38847           1          0          0                0      219         98
## 21965           1          0          0                0      132         70
## 37530           0          1          0                0       66         66
## 16743           1          0          0                0      178         94
## 41515           1          0          0                0     3751       1007
## 543             1          0          0                0      177         94
## 3988            1          0          0                0      184         94
## 57777           1          0          0                0      499        425
## 25542           0          1          0                0       62         62
## 58279           0          1          0                0       62         62
## 35155           1          0          0                0      782        428
## 5340            1          0          0                0    10454       1724
## 27236           1          0          0                0     3244       1630
## 44362           0          1          0                0       62         62
## 32797           1          0          0                0      211        102
## 52902           1          0          0                0    21288      10113
## 42171           0          1          0                0       66         66
## 3124            1          0          0                0      177         94
## 12614           1          0          0                0      168         86
## 65506           1          0          0                0     5776       1880
## 18576           0          1          0                0       66         66
## 53355           1          0          0                0      194         99
## 42124           0          1          0                0       62         62
## 38205           1          0          0                0      186         95
## 31562           0          1          0                0       62         62
## 56730           0          1          0                0       66         66
## 1160            1          0          0                0      184         94
## 46391           0          1          0                0       62         62
## 7486            0          1          0                0       62         62
## 54079           1          0          0                0      366        180
## 5158            0          1          0                0       66         66
## 35908           0          0          1                0       70         70
## 37132           0          0          1                0       70         70
## 2820            0          0          1                0       70         70
## 13283           1          0          0                0      165         86
## 23624           0          1          0                0       62         62
## 56373           1          0          0                0     3354       1792
## 57240           1          0          0                0       70         70
## 59170           0          0          1                0       70         70
## 21379           0          1          0                0       66         66
## 5372            1          0          0                0      176         90
## 33558           1          0          0                0     4427       1520
## 5211            0          0          1                0       66         66
## 48266           1          0          0                0      172         88
## 5001            1          0          0                0   367162      17367
## 53703           1          0          0                0     7149       1719
## 2760            0          1          0                0       62         62
## 42501           0          0          1                0       66         66
## 13955           0          0          1                0       70         70
## 38479           1          0          0                0     4305        699
## 2007            0          1          0                0       62         62
## 826             1          0          0                0      558        210
## 52165           0          0          1                0       70         70
## 55552           1          0          0                0      196        102
## 51633           1          0          0                0      323        150
## 37398           1          0          0                0      178         94
## 45954           0          0          1                0       66         66
## 57835           0          1          0                0       66         66
## 5349            1          0          0                0       70         70
## 22697           1          0          0                0      130         70
## 56202           0          0          1                0       70         70
## 13350           1          0          0                0    35970      11674
## 8258            1          0          0                0      295         90
## 48253           1          0          0                0      204         64
## 20797           0          1          0                0       66         66
## 248             0          0          1                0       70         70
## 26891           0          0          1                0       70         70
## 63302           1          0          0                0      199        102
## 4057            1          0          0                0     2192       1049
## 29021           1          0          0                0 14233438     253696
## 26932           0          1          0                0       60         60
## 22840           0          0          1                0       70         70
## 6230            1          0          0                0      217         90
## 41527           0          1          0                0      146        146
## 19558           1          0          0                0      870        330
## 30532           0          0          1                0       70         70
## 30065           1          0          0                0      318        192
## 44966           1          0          0                0    10325       2594
## 49462           1          0          0                0     4962        887
## 31542           0          0          1                0       70         70
## 20933           1          0          0                0     5574        999
## 12377           0          1          0                0       60         60
## 16459           0          1          0                0       62         62
## 16917           1          0          0                0      198        128
## 48849           1          0          0                0      244         98
## 58835           0          1          0                0       66         66
## 33042           0          0          1                0       70         70
## 40843           1          0          0                0     4398       1491
## 7860            0          1          0                0       62         62
## 5928            1          0          0                0      225        110
## 61501           1          0          0                0     9597       2488
## 63685           1          0          0                0  4626083     113708
## 14451           0          0          1                0       70         70
## 8193            1          0          0                0      366        240
## 63312           1          0          0                0      138         78
## 40372           0          1          0                0       66         66
## 59106           0          0          1                0       70         70
## 57863           0          1          0                0       62         62
## 5425            0          0          1                0       66         66
## 18274           1          0          0                0     7274       1925
## 9576            1          0          0                0       70         70
## 45003           1          0          0                0       70         70
## 61281           0          1          0                0       66         66
## 55213           0          1          0                0       66         66
## 42753           1          0          0                0    30057      13955
## 5847            0          0          1                0       70         70
## 10623           0          0          1                0       70         70
## 7664            0          1          0                0       62         62
## 3982            1          0          0                0      260        102
## 62328           1          0          0                0     9616       6956
## 7794            0          1          0                0       62         62
## 54274           1          0          0                0      177         94
## 11335           1          0          0                0     3642        421
## 22825           0          0          1                0       70         70
##       Bytes.Received Packets Elapsed.Time..sec. pkts_sent pkts_received
## 4519            7140      60                410        27            33
## 13921              0       1                  0         1             0
## 21316              0       1                  0         1             0
## 61397              0       1                  5         1             0
## 59091              0       1                  0         1             0
## 61449           6900      26                362        12            14
## 21342              0       1                  0         1             0
## 10124            430      12                 81         7             5
## 62762              0       6               1199         6             0
## 42177              0       1                  0         1             0
## 21317              0       1                  0         1             0
## 2656               0       1                  0         1             0
## 35422             87       2                 30         1             1
## 4781              89       2                 30         1             1
## 55032             83       2                 30         1             1
## 2793               0       1                  0         1             0
## 63544              0       1                  0         1             0
## 1966             258       9                 21         5             4
## 58168            132       2                 30         1             1
## 37824           1034      14                151         8             6
## 48250           1190      13                 83         7             6
## 41867          12913      23                124         9            14
## 32808              0       2                 40         2             0
## 60722              0       1                  0         1             0
## 12209            101       2                 30         1             1
## 58810           6290      49                115        22            27
## 43979              0       1                  0         1             0
## 36938            202       4                 31         2             2
## 14813              0       1                  0         1             0
## 59979            143       2                 32         1             1
## 55479            106       2                 30         1             1
## 41314            380       2               1200         1             1
## 22837              0       1                  0         1             0
## 56850           4511      25                 16        13            12
## 7337               0       1                  0         1             0
## 23632              0       1                  0         1             0
## 16543           1263      23                 27        13            10
## 61906              0       1                  0         1             0
## 64959              0       1                  0         1             0
## 32094              0       1                  0         1             0
## 38121              0       1                  0         1             0
## 48018            745       2                 31         1             1
## 32945              0       1                  0         1             0
## 50828           9235      31                 82        17            14
## 15931              0       1                  0         1             0
## 4392             952      10                 15         6             4
## 62548           5509      38                 90        19            19
## 28973              0       1                  4         1             0
## 63723              0       8                 44         8             0
## 52780              0       1                  0         1             0
## 47578              0       1                  0         1             0
## 22171           1775      15                 15         8             7
## 50173            209       2                 30         1             1
## 22596              0       1                  4         1             0
## 41014              0       1                  0         1             0
## 37644              0       1                  0         1             0
## 44523              0       1                  0         1             0
## 40851            258       6                 30         3             3
## 2252               0       1                  0         1             0
## 43215              0       1                  0         1             0
## 6227           76349     103                 16        48            55
## 3920             105       2                 29         1             1
## 60301              0       1                  0         1             0
## 3081              90       2                 29         1             1
## 8570             613       2                 30         1             1
## 65470              0       1                  5         1             0
## 28805              0       1                  0         1             0
## 41110              0       1                  0         1             0
## 25385              0       1                  0         1             0
## 44242            206       8                 26         5             3
## 35075              0       1                  0         1             0
## 32480             78       2                 30         1             1
## 54476            613       2                 30         1             1
## 51675              0       1                  0         1             0
## 44625            123       2                 29         1             1
## 45247            930      19                 34        11             8
## 39488              0       1                  0         1             0
## 8487               0       1                  0         1             0
## 5842              91       2                 30         1             1
## 23741            737       2                 30         1             1
## 59676            119       2                 29         1             1
## 58646              0       1                  0         1             0
## 28429              0       1                  0         1             0
## 64070            122       2                 30         1             1
## 17301             95       2                 31         1             1
## 10147           7795      41                150        21            20
## 34908              0       1                  0         1             0
## 13079              0       1                  0         1             0
## 25136             80       2                 30         1             1
## 53449              0       2                  8         2             0
## 17039            223       2                 31         1             1
## 65011              0       1                  0         1             0
## 30757             93       2                 30         1             1
## 32937              0       1                  0         1             0
## 61594            450      12                 78         7             5
## 41164           5273      22                129        13             9
## 37252             91       2                 30         1             1
## 31019              0       1                  0         1             0
## 41787              0       1                  0         1             0
## 30595             90       2                 31         1             1
## 32431            101       2                 30         1             1
## 26499              0       1                  0         1             0
## 30173           6429      27                135        15            12
## 55813           5084      24                 16        13            11
## 62233              0       1                  0         1             0
## 51031              0       1                  0         1             0
## 23204              0       1                  0         1             0
## 23677              0       1                  0         1             0
## 45799           6388      16                 15        10             6
## 54341              0       1                  0         1             0
## 26900              0       1                  0         1             0
## 51885            126       2                 31         1             1
## 1749               0       1                  0         1             0
## 64900            112       2                 30         1             1
## 58048            717       2                 30         1             1
## 57868              0       1                  0         1             0
## 29692            720       2                 31         1             1
## 23808            883       2                 30         1             1
## 8669            4034      92                626        50            42
## 55070              0       2                  8         2             0
## 4420            3663      59                122        33            26
## 4438             209       4                 32         2             2
## 13451             81       2                 30         1             1
## 20527           7319      22                 78        10            12
## 11491            126       7                 25         5             2
## 2340               0       1                  0         1             0
## 42925              0       1                  0         1             0
## 44740              0       1                  0         1             0
## 28485          34626     136                 47        63            73
## 37233            102       2                 31         1             1
## 57552            186       8                 15         5             3
## 4022               0       1                  4         1             0
## 27255              0       1                  0         1             0
## 40267              0       1                  0         1             0
## 22745              0       1                  0         1             0
## 18627            704       2                 30         1             1
## 21567            748       2                 30         1             1
## 14724              0       2                  8         2             0
## 63564              0       1                  0         1             0
## 41864          34735      58                 19        27            31
## 58136             93       2                 30         1             1
## 2761               0       1                  0         1             0
## 63672         382810     411                143       151           260
## 53764              0       1                  0         1             0
## 21811           2476      25                 18        13            12
## 9941             767      11                 18         7             4
## 25445              0       1                 30         1             0
## 9207               0       1                  0         1             0
## 22495              0       1                  0         1             0
## 60133           2169      22                 21        13             9
## 27618            198       2                 30         1             1
## 64536              0       1                  0         1             0
## 59619             69       2                 30         1             1
## 33984              0       1                  0         1             0
## 46358           7361      26                186        14            12
## 36883             90       2                 30         1             1
## 54999              0       1                  0         1             0
## 37217            659       2                 31         1             1
## 29937              0       1                  0         1             0
## 58791              0       1                  5         1             0
## 9019             174       2                 30         1             1
## 27556         260551     574                158       255           319
## 50951            763       2                 30         1             1
## 53670              0       1                  0         1             0
## 1998               0       1                  0         1             0
## 5556            7325      40                 15        24            16
## 34498           2653      13                121         6             7
## 8791               0       1                  0         1             0
## 35198           4085      16                145        10             6
## 49599            119       2                 29         1             1
## 10546            828      11                 61         6             5
## 7832               0       1                  0         1             0
## 5724               0       1                  0         1             0
## 53065        2495646    2678                 84       986          1692
## 55222              0       1                  0         1             0
## 60563              0       1                  0         1             0
## 6649               0       2                  8         2             0
## 16201              0       1                  0         1             0
## 37112              0       1                  0         1             0
## 39159            180       6                  6         3             3
## 50280            124       3                 11         1             2
## 21664              0       1                  0         1             0
## 31355             93       2                 30         1             1
## 33033              0       1                  0         1             0
## 38411          35391      54                 54        26            28
## 36913           3525      30                 23        17            13
## 58817              0       8               3633         8             0
## 9343             258       6                 30         3             3
## 39610             60       2                 15         1             1
## 41479              0       1                  0         1             0
## 36548              0       1                  0         1             0
## 9626               0       1                  0         1             0
## 48451           7548      79                 19        40            39
## 56007             97       2                 30         1             1
## 42634           3473      39                 97        21            18
## 63808          19280      35                 17        16            19
## 25140             98       2                 30         1             1
## 49492            194       4                 19         1             3
## 3807              93       2                 31         1             1
## 13077              0       1                  0         1             0
## 18195             97       2                 29         1             1
## 50565           5479      18                 15         9             9
## 38529             96       2                 30         1             1
## 508                0       1                  5         1             0
## 16306            626       2                 30         1             1
## 10393              0       1                  0         1             0
## 30229            202       4                 47         2             2
## 34807             74       7                 15         6             1
## 27594              0      22               1526        22             0
## 21686              0       1                  0         1             0
## 63632              0       1                  0         1             0
## 31394           6293      27                150        14            13
## 51622            755       2                 30         1             1
## 23887           5955      18                135         8            10
## 61223           7237      23                 52        11            12
## 27278              0       1                  0         1             0
## 44310              0       1                  0         1             0
## 15214              0       1                  0         1             0
## 49693             74       3                  6         2             1
## 9004          927658    1244                141       563           681
## 63308              0       2                  7         2             0
## 20938             88       2                 30         1             1
## 14241            126       6                 26         4             2
## 39693        1519008    1758                185       656          1102
## 56361           8222      23                 18        14             9
## 42726           5411      25                315        12            13
## 6999               0       1                  0         1             0
## 56448              0       1                  0         1             0
## 51193            145       2                 29         1             1
## 10311            142       2                 30         1             1
## 53946              0       1                  0         1             0
## 63100              0       1                  0         1             0
## 32192            126       7                 21         5             2
## 17834           1021      19                 38        11             8
## 11840           1640      15               1200         8             7
## 18396             95       2                 30         1             1
## 36974            180       6                  7         3             3
## 51924            755       2                 30         1             1
## 1154           33203      71                 60        41            30
## 37419            809      18                 43        10             8
## 12061             60       2                  5         1             1
## 47095           1139      17                142        10             7
## 20634             92       2                 31         1             1
## 11071              0       1                  0         1             0
## 34051             60       2                 14         1             1
## 41868           9716      40                 18        23            17
## 20200             98       2                 30         1             1
## 43393              0       1                  0         1             0
## 50015              0       1                  0         1             0
## 19855              0       1                  0         1             0
## 19289              0       1                  0         1             0
## 59568              0       1                  0         1             0
## 20456              0       1                  0         1             0
## 51753              0       1                  0         1             0
## 56755              0       1                  0         1             0
## 1396               0       1                  0         1             0
## 24703           3332      27                 27        15            12
## 47162              0       1                 90         1             0
## 41100              0       1                  0         1             0
## 43756              0       1                  5         1             0
## 47463            174       2                 30         1             1
## 36910            106       2                 30         1             1
## 21523           3952      22                 17        13             9
## 51106              0       1                  0         1             0
## 36874             97       2                 30         1             1
## 24069             82       2                 30         1             1
## 48118              0       1                  0         1             0
## 35405            795      19                 69        10             9
## 4221               0       1                  0         1             0
## 52109              0       1                  0         1             0
## 15172              0       1                  0         1             0
## 13853             85       2                 30         1             1
## 45875          37300      77                 54        36            41
## 13064              0       1                  0         1             0
## 13594              0       1                  0         1             0
## 18344             89       2                 30         1             1
## 38000            379       2               1199         1             1
## 55635       22597986   24754                320      8113         16641
## 4484              62       2                 30         1             1
## 3323              90       2                 30         1             1
## 17453              0       1                  0         1             0
## 56203              0       1                  0         1             0
## 45239              0       1                  5         1             0
## 58768            160       2                 30         1             1
## 33224              0       1                  5         1             0
## 48820              0       1                  0         1             0
## 49940              0       2                  7         2             0
## 49139            966      13                 61         7             6
## 28207              0       1                  0         1             0
## 23732           5817      29                391        10            19
## 20414             92       2                 31         1             1
## 2857            6020      20                 15        11             9
## 34688          12034      25                 31        11            14
## 47876            140       6                 27         4             2
## 44706              0       1                  0         1             0
## 7466               0       1                  0         1             0
## 299              180       6                  6         3             3
## 13880            176       2               1200         1             1
## 41753              0       1                  0         1             0
## 8882               0       1                  0         1             0
## 19941             97       2                 29         1             1
## 32088              0       1                  0         1             0
## 6832               0       1                  0         1             0
## 42415              0       1                  0         1             0
## 65526            238       2                 30         1             1
## 2895             104       2                 31         1             1
## 58580             89       2                 30         1             1
## 25475            100       2                 31         1             1
## 65009              0       1                  0         1             0
## 10315             95       2                 30         1             1
## 53370           2051      32                361        18            14
## 35060            144       2                 31         1             1
## 60614              0       1                  0         1             0
## 50106              0       1                  0         1             0
## 8385               0       1                  0         1             0
## 10103              0       1                  0         1             0
## 65059              0       1                  0         1             0
## 42426              0       1                  0         1             0
## 26704            662      16                 94         9             7
## 13729            355      10                255         6             4
## 5790           99851      94                 15        24            70
## 26589              0       1                  0         1             0
## 16578              0       3                  8         3             0
## 4490              91       2                 31         1             1
## 21604            415      11                342         6             5
## 18982              0       1                  0         1             0
## 13694              0       1                  0         1             0
## 50725              0       1                  0         1             0
## 39406              0       1                  0         1             0
## 30599             84       2                 31         1             1
## 43405              0       1                  0         1             0
## 39561              0       1                  0         1             0
## 9926               0       1                 30         1             0
## 9780               0       1                  0         1             0
## 52308           6252      32                109        17            15
## 9877             288       2                 30         1             1
## 26063           5519      23                 87        12            11
## 53399           5167      17                 27         9             8
## 9951            5621      31                 20        17            14
## 56439              0       1                  0         1             0
## 62318          58120     112                 44        39            73
## 50879             74       4                 30         3             1
## 36016            168       4                 35         2             2
## 22278              0       1                  4         1             0
## 48370         114348     200                 84       101            99
## 57086           5589      24                201        13            11
## 22735              0       1                  0         1             0
## 6738               0       1                  0         1             0
## 20708            239       4                 52         2             2
## 46397              0       1                  0         1             0
## 22130              0       1                  0         1             0
## 40081            149       2                 30         1             1
## 6501            2472      10                120         5             5
## 18037              0       1                  0         1             0
## 18950             87       2                 31         1             1
## 43834             82       2                 29         1             1
## 60049           3992      42                186        22            20
## 60588              0       1                  0         1             0
## 62148             74       3                 11         2             1
## 3520             254       2                 30         1             1
## 21810            454      15                 39        10             5
## 44472              0       4                 12         4             0
## 37859         438835     488                 66       185           303
## 29802            189       4                 31         2             2
## 33632              0       1                  5         1             0
## 29501              0       1                  5         1             0
## 46373             93       2                 30         1             1
## 28722              0       1                  0         1             0
## 56963             82       2                 29         1             1
## 35515            114       2                 30         1             1
## 7642               0       1                  0         1             0
## 9193               0       1                  0         1             0
## 50382              0       1                  0         1             0
## 27418          29974      45                123        18            27
## 52448              0       1                  0         1             0
## 23072             93       2                 30         1             1
## 30147              0       1                  0         1             0
## 44119           1614       7                120         3             4
## 64169             74       4                  0         3             1
## 12619            668       2                 31         1             1
## 28265           1614       7                120         3             4
## 6073               0       1                  0         1             0
## 65152              0       1                  0         1             0
## 52381              0       1                  0         1             0
## 18375             93       2                 31         1             1
## 24541           4467      16                 15         7             9
## 34697              0       1                  0         1             0
## 54059           5186      24                 76        13            11
## 59260         568843     497                 38       114           383
## 27153            180       6                  6         3             3
## 61217            270       6                 31         3             3
## 17470              0       1                  0         1             0
## 41021              0       1                  0         1             0
## 62674           1311      18                 17         9             9
## 47772             81       2                 29         1             1
## 6539             146       3                 33         2             1
## 39436              0       1                  0         1             0
## 49383              0       1                  0         1             0
## 30540              0       1                  0         1             0
## 62053             92       2                 30         1             1
## 17416              0       1                  0         1             0
## 7008               0       1                  0         1             0
## 57263              0       1                  0         1             0
## 37999           4956      16                127         7             9
## 43125            126       6                 26         4             2
## 10021              0       1                  0         1             0
## 5759             126       7                 26         5             2
## 3543            7429      26                 93        11            15
## 42108              0       1                  0         1             0
## 10333            719      17                 36         9             8
## 22250              0       1                  0         1             0
## 63362              0       2                  8         2             0
## 42107              0       1                  0         1             0
## 37885             99       2                 29         1             1
## 48353             91       2                 30         1             1
## 56872           4453      23                 16        13            10
## 45439            774       2                 31         1             1
## 9708               0       1                  0         1             0
## 52345           5057      38                 20        21            17
## 17467              0       1                  0         1             0
## 53809              0       1                  5         1             0
## 49239              0       1                  0         1             0
## 43093              0       2                  8         2             0
## 32259              0       1                  0         1             0
## 12191            267       6                 30         3             3
## 15647            134       2                 33         1             1
## 4951              97       2                 30         1             1
## 23968              0       1                  0         1             0
## 19777             92       2                 31         1             1
## 59511              0       1                  0         1             0
## 2995               0       3                 13         3             0
## 5866            6046      25                361        13            12
## 28426              0       1                  0         1             0
## 3616               0       1                  0         1             0
## 13391          22675      34                 41        14            20
## 39370             89       2                 30         1             1
## 55447             84       2                 29         1             1
## 24869              0       1                  0         1             0
## 44665            188       2                 30         1             1
## 63753            726       2                 30         1             1
## 64667              0       1                  0         1             0
## 16094            145       2                 33         1             1
## 42002              0       2                  8         2             0
## 33910              0       1                  0         1             0
## 17145             89       2                 30         1             1
## 46779             66       6                 15         5             1
## 10282           1714      17                 32         9             8
## 67             12798      29                 15        14            15
## 35695              0       1                  0         1             0
## 37981              0       1               1199         1             0
## 13702              0       1                  0         1             0
## 1194              83       2                 30         1             1
## 61358              0       1                  0         1             0
## 35140             83       2                 30         1             1
## 10447              0       1                  0         1             0
## 50583            754       4                 32         3             1
## 17473              0       1                  0         1             0
## 55426             84       2                 29         1             1
## 21111           2019      19                 18        10             9
## 55223              0       1                  0         1             0
## 58596           7942      38                120        20            18
## 36957             91       2                 31         1             1
## 51457              0       1                  0         1             0
## 2957               0       1                  0         1             0
## 3828               0       1                  5         1             0
## 23251              0       1                  0         1             0
## 62058           2907      19                 27        11             8
## 12622            659       2                 31         1             1
## 45778             94       2                 30         1             1
## 44347              0       1                  0         1             0
## 40350              0       1                  0         1             0
## 4716               0       1                  0         1             0
## 18161             82       2                 31         1             1
## 40307              0       1                  0         1             0
## 29537              0       1                  0         1             0
## 26918              0       1                  0         1             0
## 15757              0       1                  0         1             0
## 63252              0       7               3616         7             0
## 19603              0       1                  0         1             0
## 41439              0       1                  0         1             0
## 12858         633265     779                118       335           444
## 38480            379       4                 54         2             2
## 65135          45628      65                 46        29            36
## 9925              83       2                 30         1             1
## 8855               0       1                  0         1             0
## 12669              0       1                  0         1             0
## 44887              0       1                  0         1             0
## 58489           4615       9                 15         4             5
## 2916               0       2                  8         2             0
## 58527              0       2                  7         2             0
## 32516           5989      20                 16        11             9
## 44165             83       2                 30         1             1
## 28845              0       1                  0         1             0
## 24255            424      12                 16         7             5
## 49754              0       1                  0         1             0
## 10975              0       1                  0         1             0
## 43106            109       2                 30         1             1
## 126                0       1                  5         1             0
## 30358            100       2                 31         1             1
## 39835              0       1                  0         1             0
## 11092              0       1                  0         1             0
## 58970            183       2                 30         1             1
## 38068              0       1                  0         1             0
## 56544            106       2                 30         1             1
## 49764              0       1                  0         1             0
## 54740           2043      26                418        14            12
## 63352             60       2                 15         1             1
## 7364               0       1                  0         1             0
## 44939            503      11                 25         6             5
## 10246            229       4                 47         2             2
## 5061             122       6                 15         4             2
## 59974            139       2                 30         1             1
## 44699              0       1                  0         1             0
## 31081             84       2                 30         1             1
## 48377             90       2                 29         1             1
## 51888              0       1                  0         1             0
## 62738           7438      35                 92        17            18
## 62006          16626      32                 28        13            19
## 21966          21734      27                 85        10            17
## 42931              0       1                  0         1             0
## 25772             90       2                 30         1             1
## 49250              0       1                  4         1             0
## 29343              0       1                  0         1             0
## 46830             97       2                 30         1             1
## 64366            118       2                 30         1             1
## 32378            102       2                 30         1             1
## 30177           5142      25                 25        14            11
## 63199              0       1                  0         1             0
## 4908             176       4                 47         2             2
## 50498              0       6               3609         6             0
## 8854               0       1                  0         1             0
## 63193              0       1                  0         1             0
## 64109        4827641    5384                290      1964          3420
## 33814              0       1                  0         1             0
## 56606            919      10                 15         5             5
## 32064              0       1                  0         1             0
## 53274              0       1                  0         1             0
## 58640              0       1                  0         1             0
## 13415            126       6                 43         4             2
## 61401              0       1                  5         1             0
## 40211              0       1                  0         1             0
## 33513            142       2                 30         1             1
## 22778              0       1                  0         1             0
## 3392             856      13                 36         7             6
## 20397           1319       4                 41         2             2
## 57570            436       4                 36         2             2
## 32793              0       4                 12         4             0
## 58987           4394      18                 17        10             8
## 37694            701      10                 15         5             5
## 17478              0       1                  0         1             0
## 37416           7564      47                 68        25            22
## 46727           1567      14                 16         7             7
## 59235              0       1                  0         1             0
## 46854             82       2                 30         1             1
## 16105           6945      51                161        29            22
## 47560              0       1                  0         1             0
## 64789            126       6                 26         4             2
## 15460            282       2                 30         1             1
## 56809              0       1                  0         1             0
## 16910              0       1                  0         1             0
## 53389             91       2                 31         1             1
## 43035            424      10                 25         6             4
## 60235              0       1                  0         1             0
## 41093           4790      22                 53        12            10
## 56126            440      10                 15         5             5
## 62730           2319      13                135         6             7
## 45910              0       2                  8         2             0
## 41809              0       1                  0         1             0
## 1267            8999      28                 62        16            12
## 19261              0       1                  0         1             0
## 2440             228       2                 29         1             1
## 648                0       1                  0         1             0
## 53215              0       1                  0         1             0
## 8936            1487      14                 15         7             7
## 15448           7153      48                 27        26            22
## 15150              0       1                  0         1             0
## 37710            121       2                 30         1             1
## 25677             91       2                 30         1             1
## 43700           3607      17                 75        10             7
## 12795           3445      16                 26         9             7
## 59550              0       1                  0         1             0
## 25684            437       4                 46         2             2
## 41534              0       1                  0         1             0
## 59342             74       4                 26         3             1
## 47800             99       2                 29         1             1
## 45862           5509      24                224        14            10
## 38812             80       2                 29         1             1
## 34105              0       2                  8         2             0
## 40698           6127      21                 16        11            10
## 47109             96       2                 30         1             1
## 6070               0       1                  0         1             0
## 4936              99       2                 30         1             1
## 48585             89       2                 31         1             1
## 16349           4466      16                 15         7             9
## 34766           2907      19                 26        11             8
## 30601           5003      19                136         9            10
## 25224            104       2                 30         1             1
## 40039              0       1                  0         1             0
## 21238              0       1                  0         1             0
## 38457             97       2                 30         1             1
## 31129             82       3                 44         2             1
## 45840             83       2                 29         1             1
## 39498              0       1                  0         1             0
## 937             5348      38                 50        18            20
## 41842              0       1                  0         1             0
## 10481              0       1                  0         1             0
## 9401               0       1                  0         1             0
## 38591              0       1                  0         1             0
## 32514             89       2                 31         1             1
## 36438             83       2                 30         1             1
## 46934            418       8                 81         4             4
## 654                0       1                  0         1             0
## 42409              0       1                  0         1             0
## 45238           5486      28                373        14            14
## 53974              0       1                  0         1             0
## 20276              0       1                  0         1             0
## 28562           1262      17                 79        10             7
## 60239              0       1                  0         1             0
## 25939              0       1                  0         1             0
## 53410            258       6                 30         3             3
## 4500              89       2                 31         1             1
## 15016              0       1                  0         1             0
## 44394              0       1                  0         1             0
## 22466              0       1                  0         1             0
## 45696              0       1                  0         1             0
## 37977            248       2                 31         1             1
## 4418            2149      35                 94        18            17
## 14324            392       8                 31         4             4
## 26789            290       2                 30         1             1
## 58895            180       6                  6         3             3
## 19359           4468      16                 16         7             9
## 55053           2815      46                 82        22            24
## 23932              0       1                  0         1             0
## 18862             94       2                 29         1             1
## 33255            276       2                 30         1             1
## 4635               0       1                  0         1             0
## 57940              0       4                 12         4             0
## 40995              0       1                  0         1             0
## 17949              0       1                  0         1             0
## 51586              0       4                 12         4             0
## 50158            117       2                 30         1             1
## 42441              0       1                  0         1             0
## 23657              0       1                  0         1             0
## 47732            647       2                 31         1             1
## 30165             83       2                 30         1             1
## 39466              0       1                  0         1             0
## 129                0       1                  5         1             0
## 9309           40501      74                 47        36            38
## 19029              0       1                  0         1             0
## 62043            110       2                 30         1             1
## 19374            104       2                 30         1             1
## 48164              0       1                  0         1             0
## 38602              0       1                  0         1             0
## 45567              0       1                  0         1             0
## 25998              0       2                  8         2             0
## 8097            2105      20                 19        12             8
## 56250              0       1                  0         1             0
## 21983              0       1                 30         1             0
## 47031              0       1                  0         1             0
## 22223              0       1                  0         1             0
## 12560              0       1                  5         1             0
## 3821               0       1                  5         1             0
## 46484              0       1                  0         1             0
## 16385           3779      17                 30         9             8
## 24859              0       1                  0         1             0
## 35478             83       2                 30         1             1
## 24834              0       1                  0         1             0
## 44816           2907      19                 29        11             8
## 50037              0       1                  0         1             0
## 57070            842      14                224         7             7
## 31616              0       1                  0         1             0
## 10860              0       1                  0         1             0
## 1625              85       2                 30         1             1
## 14913          19365      49                 28        24            25
## 12625           3274      17                135         9             8
## 63031          17621      79                141        40            39
## 35057             90       2                 30         1             1
## 6705               0       1                  0         1             0
## 62480           5575      18                 57         9             9
## 59860              0       1                  0         1             0
## 42139              0       1                  0         1             0
## 43081            517      11                 15         6             5
## 59150              0       1                  0         1             0
## 36476            518       4               1200         2             2
## 43734           1508       4                 31         2             2
## 6615            2303      12                 23         7             5
## 10184           6854      25                186        14            11
## 9778               0       1                  0         1             0
## 20712        1876248    1747                 21       470          1277
## 13640             90       2                 30         1             1
## 59389           2155      14                135         6             8
## 5523             100       2                 30         1             1
## 29064         175229     238                206       101           137
## 27924           1883      25                117        13            12
## 14337           5715      38                271        20            18
## 48881             86       2                 30         1             1
## 50532              0       1                  0         1             0
## 55425             92       2                 29         1             1
## 11647              0       2                  8         2             0
## 2860            5284      27                316        13            14
## 28080              0       1                  0         1             0
## 29785            820       5                 30         1             4
## 39675            306      11                 60         6             5
## 12618            647       2                 31         1             1
## 11206            195       4                 32         2             2
## 50716              0       1                  0         1             0
## 24100         138641     190                 19        88           102
## 7677               0       1                  0         1             0
## 17044            101       2                 31         1             1
## 41904              0       1                  0         1             0
## 60500              0       1                  5         1             0
## 23255              0       1                  0         1             0
## 15255              0       1                  0         1             0
## 38270              0       1                  0         1             0
## 60237              0       1                  0         1             0
## 40274              0       1                  0         1             0
## 34028             81       2                 30         1             1
## 24954           2512      17                137         8             9
## 24440              0       1                  0         1             0
## 4543              89       2                 30         1             1
## 8889               0       1                  0         1             0
## 26404              0       1                  0         1             0
## 1332               0       1                  0         1             0
## 64088           3183      16                 26         9             7
## 22392              0       1                  0         1             0
## 64184              0       1                  0         1             0
## 3122              89       2                 30         1             1
## 17259            110       2                 30         1             1
## 35015            126       6                 26         4             2
## 34728              0       1                  0         1             0
## 29325              0       1                  0         1             0
## 56196              0       1                  0         1             0
## 9522             145       2                 30         1             1
## 8004               0       1                  0         1             0
## 41413              0       1                  0         1             0
## 19198           4671      19                101        11             8
## 59999             90       2                 30         1             1
## 47586              0       1                  0         1             0
## 40981              0       1                  0         1             0
## 24452              0       1                  0         1             0
## 6731               0       1                  0         1             0
## 13483             80       2                 31         1             1
## 60650              0       1                  0         1             0
## 30117          29488      67                128        28            39
## 21332              0       1                  0         1             0
## 37240              0       1                  0         1             0
## 31307           5092      23                 19        13            10
## 19466             79       2                 30         1             1
## 16111          60073     121                324        66            55
## 41387              0       1                  0         1             0
## 18679           5603      26                211        14            12
## 50369              0       1                  0         1             0
## 41349              0       1                  0         1             0
## 29521              0       1                  0         1             0
## 13011              0       1                  0         1             0
## 7192               0       1                  0         1             0
## 45024            105       2                 30         1             1
## 65239         187746     336                317       183           153
## 25580              0       1                  0         1             0
## 37836            107       2                 30         1             1
## 31840           4468      16                 15         7             9
## 22142              0       1                  0         1             0
## 62123            109       2                 31         1             1
## 12840           6669      29                413        11            18
## 25670           5254      23                 27        13            10
## 31825            213       4                 33         2             2
## 25728            104       2                 30         1             1
## 65151              0       1                  0         1             0
## 48911             97       2                 30         1             1
## 57171            101       2                 30         1             1
## 4733               0       1                  0         1             0
## 55275              0       1                  0         1             0
## 49213              0       1                  0         1             0
## 10523              0       1                  0         1             0
## 20474            134       3                 32         2             1
## 23319           6470      53                616        28            25
## 37131              0       1                  0         1             0
## 44281              0       1               1200         1             0
## 34899              0       1                  0         1             0
## 25149             97       2                 30         1             1
## 10468              0       1                  0         1             0
## 968                0       1                  0         1             0
## 59957              0       1                  0         1             0
## 26999           1413      10                 15         6             4
## 4953              96       2                 30         1             1
## 16950              0       1                  0         1             0
## 43154              0       1                  0         1             0
## 10027              0       1                  0         1             0
## 45632           1230      10                 20         6             4
## 38163              0       1                  0         1             0
## 38768              0       1                  0         1             0
## 41171          80936      92                125        32            60
## 6703               0       1                  0         1             0
## 39721             97       2                 30         1             1
## 61720              0       1                  0         1             0
## 29920              0       1                  0         1             0
## 38698           7146      27                221        15            12
## 32774           6550      26                 72        13            13
## 6876               0       1                  0         1             0
## 17938              0       1                  0         1             0
## 29961              0       1                  0         1             0
## 37801            104       2                 30         1             1
## 64428             83       2                 30         1             1
## 46086             83       2                 30         1             1
## 23041             90       2                 29         1             1
## 42350            105       2                 30         1             1
## 348             1494      15                 38         9             6
## 40766           4242      26                329        14            12
## 31159           6384      27                 32        13            14
## 18513              0       1                  0         1             0
## 14618            200       4                 51         2             2
## 39559              0       1                  0         1             0
## 5038              97       2                 30         1             1
## 48438             94       2                 30         1             1
## 1831               0       1                  0         1             0
## 35323              0       1                  0         1             0
## 37857           4977      23                 45        13            10
## 7285               0       1                  0         1             0
## 16144              0       1                  0         1             0
## 47434            101       2                 30         1             1
## 9231               0       1                  0         1             0
## 45212              0       1                  0         1             0
## 18148            285       2                 31         1             1
## 43671              0       1                  0         1             0
## 5536              97       2                 29         1             1
## 62174           4981      18                131         9             9
## 38306           3802      18                 45        11             7
## 40841            208       2                 30         1             1
## 60676              0       1                  0         1             0
## 10502              0       1                  0         1             0
## 37436           5853      22                 16        12            10
## 4795             164       2                 30         1             1
## 6256         5738496    4959                154       976          3983
## 37313            660       2                 30         1             1
## 50684              0       1                  0         1             0
## 34386            324      12                195         7             5
## 52359            145       2                 33         1             1
## 9890             128       2                 30         1             1
## 65303        5786940    5053                 78      1161          3892
## 20505             92       2                 30         1             1
## 31820              0       1                  4         1             0
## 40972              0       1                  0         1             0
## 37810              0       1                 30         1             0
## 54407              0       1                  0         1             0
## 28776              0       1                  0         1             0
## 30685            122       9                 15         7             2
## 39729            204       8                141         5             3
## 64756        7881327    6752                120      1393          5359
## 2159           22444      36                318        19            17
## 43849             90       2                 30         1             1
## 43733           1498       4                 31         2             2
## 55828           6134      24                 81        14            10
## 39333             99       2                 30         1             1
## 42519              0       1                  0         1             0
## 21231           1249      17                 16         8             9
## 4956              91       2                 30         1             1
## 42738             74       4                 35         3             1
## 12737           3436      16                 26         9             7
## 17356              0       1                  0         1             0
## 21136            179       4                 41         2             2
## 56176             83       2                 31         1             1
## 47194            105       2                 31         1             1
## 42498              0       1                  0         1             0
## 47156             99       2                 30         1             1
## 25443             90       2                 30         1             1
## 50638            246       9                 35         5             4
## 36705             80       2                 31         1             1
## 11590              0       1                  0         1             0
## 48757            280       9                 32         5             4
## 10712            353       4                 33         2             2
## 29445           4466      16                 15         7             9
## 54553              0       1                  0         1             0
## 56357            656       2                 30         1             1
## 29759            589      12                 48         6             6
## 40385           5099      28                 46        16            12
## 60274              0       1                  0         1             0
## 38239              0       1                  0         1             0
## 44772              0       1                  0         1             0
## 46155           2566      14                135         7             7
## 60110             60       2                 15         1             1
## 44915          25982      46                323        19            27
## 22420              0       1                  0         1             0
## 46789             97       2                 29         1             1
## 62768              0       1                  0         1             0
## 41684              0       3               1200         3             0
## 11564              0       1                  0         1             0
## 38847            121       2                 30         1             1
## 21965             62       2                 31         1             1
## 37530              0       1                  0         1             0
## 16743             84       2                 30         1             1
## 41515           2744      35                425        11            24
## 543               83       2                 30         1             1
## 3988              90       2                 30         1             1
## 57777             74       4                  0         3             1
## 25542              0       1                  0         1             0
## 58279              0       1                  0         1             0
## 35155            354      12                 28         7             5
## 5340            8730      28                525        12            16
## 27236           1614       8                120         4             4
## 44362              0       1                  0         1             0
## 32797            109       2                 30         1             1
## 52902          11175     107                 31        51            56
## 42171              0       1                  0         1             0
## 3124              83       2                 30         1             1
## 12614             82       2                 31         1             1
## 65506           3896      19                272        11             8
## 18576              0       1                  0         1             0
## 53355             95       2                 30         1             1
## 42124              0       1                  0         1             0
## 38205             91       2                 31         1             1
## 31562              0       1                  0         1             0
## 56730              0       1                  0         1             0
## 1160              90       2                 30         1             1
## 46391              0       1                  0         1             0
## 7486               0       1                  0         1             0
## 54079            186       7                 93         4             3
## 5158               0       1                  0         1             0
## 35908              0       1                  0         1             0
## 37132              0       1                  0         1             0
## 2820               0       1                  0         1             0
## 13283             79       2                 29         1             1
## 23624              0       1                  0         1             0
## 56373           1562      46                194        23            23
## 57240              0       1                  5         1             0
## 59170              0       1                  0         1             0
## 21379              0       1                  0         1             0
## 5372              86       2                 30         1             1
## 33558           2907      19                 30        11             8
## 5211               0       1                  0         1             0
## 48266             84       2                 31         1             1
## 5001          349795     470                143       214           256
## 53703           5430      18                326         9             9
## 2760               0       1                  0         1             0
## 42501              0       1                  0         1             0
## 13955              0       1                  0         1             0
## 38479           3606      13                 46         7             6
## 2007               0       1                  0         1             0
## 826              348       9                 28         4             5
## 52165              0       1                  0         1             0
## 55552             94       2                 30         1             1
## 51633            173       2               1203         1             1
## 37398             84       2                 31         1             1
## 45954              0       1                  0         1             0
## 57835              0       1                  0         1             0
## 5349               0       1                  5         1             0
## 22697             60       2                 15         1             1
## 56202              0       1                  0         1             0
## 13350          24296     108                265        55            53
## 8258             205       2                 29         1             1
## 48253            140       4                 11         2             2
## 20797              0       1                  0         1             0
## 248                0       1                  0         1             0
## 26891              0       1                  0         1             0
## 63302             97       2                 29         1             1
## 4057            1143      24                119        11            13
## 29021       13979742   12752                 54      3515          9237
## 26932              0       1                  0         1             0
## 22840              0       1                  0         1             0
## 6230             127       2                 30         1             1
## 41527              0       1                  0         1             0
## 19558            540      12               1316         6             6
## 30532              0       1                  0         1             0
## 30065            126       6                 30         4             2
## 44966           7731      28                 27        16            12
## 49462           4075      15                 28         9             6
## 31542              0       1                  0         1             0
## 20933           4575      21                256        11            10
## 12377              0       1                  0         1             0
## 16459              0       1                  0         1             0
## 16917             70       3                  0         2             1
## 48849            146       2                 31         1             1
## 58835              0       1                  0         1             0
## 33042              0       1                  0         1             0
## 40843           2907      19                 30        11             8
## 7860               0       1                  0         1             0
## 5928             115       2                 30         1             1
## 61501           7109      32                287        16            16
## 63685        4512375    4382                213      1198          3184
## 14451              0       1                  0         1             0
## 8193             126       7                 21         5             2
## 63312             60       2                  4         1             1
## 40372              0       1                  0         1             0
## 59106              0       1                  0         1             0
## 57863              0       1                  0         1             0
## 5425               0       1                  0         1             0
## 18274           5349      24                138        12            12
## 9576               0       2                  8         2             0
## 45003              0       1                  4         1             0
## 61281              0       1                  0         1             0
## 55213              0       1                  0         1             0
## 42753          16102     146                556        65            81
## 5847               0       1                  0         1             0
## 10623              0       1                  0         1             0
## 7664               0       1                  0         1             0
## 3982             158       2                 30         1             1
## 62328           2660      39                 47        21            18
## 7794               0       1                  0         1             0
## 54274             83       2                 31         1             1
## 11335           3221      12                 16         6             6
## 22825              0       1                  0         1             0
##Persistent Homology of IntFirewallData_one_hot_1000_df dataset

# calculate persistent homology for IntFirewallData_one_hot_1000_df Dataset
phom_IntFirewallData_one_hot_1000_df <- calculate_homology(IntFirewallData_one_hot_1000_df)


# plot barcode for IntFirewallData_one_hot_1000_df Dataset
plot_barcode(phom_IntFirewallData_one_hot_1000_df)

# plot persistent diagram of IntFirewallData_one_hot_1000_df Dataset
plot_persist(phom_IntFirewallData_one_hot_1000_df)

#####———————————————MAPPER ALGORITHM————————————————

#Prepare Adult dataset for Mapper 1D algorithm
adult_df1<-adult[,15]
adult.one_hot_df1<-cbind(adult.one_hot_df,adult_df1)
adult.one_hot_df2<-adult.one_hot_df1[,c(1,11,28,64,65,66,109)]
adult.one_hot_df3<-adult.one_hot_df1[,c(1,11,28,62,63,64,65,66)]
adult.one_hot_df4<-adult.one_hot_df1[,-c(109,110)] 

##Two Filter Functions PCA & KDE

#Prepare linear PCA as a filter function by centering and scaling dataset first on all one hot df dataset
b<- prcomp(adult.one_hot_df, center=TRUE, scale=TRUE)
ts_pca_b <- as.data.frame(predict(b, adult.one_hot_df))

#Conduct kernel density estimator as a filter function on 4 of 6
filter.kde <- kde(adult.one_hot_df3[,1:4],H=diag(1,nrow = 4),eval.points =adult.one_hot_df3[,1:4])$estimate


###*** Adult Mapper 5 intervals, 50% overlap, 5 bins

m_adult_5.50.5 <- mapper1D(
     distance_matrix = dist(adult.one_hot_df),
     filter_values = c(ts_pca_b$PC1),
     num_intervals = 5,
     percent_overlap = 50,
     num_bins_when_clustering = 5)


g_adult_5.50.5 <- graph.adjacency(m_adult_5.50.5$adjacency, mode="undirected")
## Warning: `graph.adjacency()` was deprecated in igraph 2.0.0.
## ℹ Please use `graph_from_adjacency_matrix()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
plot(g_adult_5.50.5, layout = layout.auto(g_adult_5.50.5))
## Warning: `layout.auto()` was deprecated in igraph 2.0.0.
## ℹ Please use `layout_nicely()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

head(str(m_adult_5.50.5$level_of_vertex))
##  int [1:5] 1 2 3 4 5
## NULL
head(str(m_adult_5.50.5$vertices_in_level))
## List of 5
##  $ : num 1
##  $ : num 2
##  $ : num 3
##  $ : num 4
##  $ : num 5
## NULL
head(str(m_adult_5.50.5$points_in_vertex))
## List of 5
##  $ : int [1:4917] 8 10 12 21 26 46 64 69 73 87 ...
##  $ : int [1:12206] 2 8 10 11 12 15 21 24 26 28 ...
##  $ : int [1:13240] 1 2 4 5 6 9 11 15 16 19 ...
##  $ : int [1:16700] 1 3 4 5 6 9 13 14 16 17 ...
##  $ : int [1:14404] 3 7 13 14 17 18 22 25 27 32 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_adult_5.50.5$level_of_vertex, na.rm=TRUE)
my_vector = m_adult_5.50.5$level_of_vertex / my_max

my_colors = my_palette(my_resolution)[as.numeric(cut(
                       my_vector, breaks=my_resolution))]

g_adult_5.50.5 <- graph.adjacency(m_adult_5.50.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_adult_5.50.5$points_in_vertex,
                             function(x) length(x)))

plot(g_adult_5.50.5, layout = layout.auto(g_adult_5.50.5),
     vertex.size = 30*log(vertex_size)/
     max(log(vertex_size)),
     vertex.color = my_colors)

##Extract the ID observations of each mapper output vertex
m_adult_5.50.5.n1<-m_adult_5.50.5$points_in_vertex[1]
    m_adult_5.50.5.n1.vec<-as.vector(unlist(m_adult_5.50.5.n1))
m_adult_5.50.5.n2<-m_adult_5.50.5$points_in_vertex[2]
    m_adult_5.50.5.n2.vec<-as.vector(unlist(m_adult_5.50.5.n2)) 
m_adult_5.50.5.n3<-m_adult_5.50.5$points_in_vertex[3]
    m_adult_5.50.5.n3.vec<-as.vector(unlist(m_adult_5.50.5.n3))
m_adult_5.50.5.n4<-m_adult_5.50.5$points_in_vertex[4]
    m_adult_5.50.5.n4.vec<-as.vector(unlist(m_adult_5.50.5.n4)) 
m_adult_5.50.5.n5<-m_adult_5.50.5$points_in_vertex[5]
    m_adult_5.50.5.n5.vec<-as.vector(unlist(m_adult_5.50.5.n5))

##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_adult_5.50.5.n1.vec<-adult.one_hot_df4[m_adult_5.50.5.n1.vec,]
tda.m_adult_5.50.5.n2.vec<-adult.one_hot_df4[m_adult_5.50.5.n2.vec,]
tda.m_adult_5.50.5.n3.vec<-adult.one_hot_df4[m_adult_5.50.5.n3.vec,]
tda.m_adult_5.50.5.n4.vec<-adult.one_hot_df4[m_adult_5.50.5.n4.vec,]
tda.m_adult_5.50.5.n5.vec<-adult.one_hot_df4[m_adult_5.50.5.n5.vec,]

##*** Adult Mapper 5 intervals, 40% overlap, 5 bins

m_adult_5.40.5 <- mapper1D(
     distance_matrix = dist(adult.one_hot_df),
     filter_values = c(ts_pca_b$PC1),
     num_intervals = 5,
     percent_overlap = 40,
     num_bins_when_clustering = 5)


g_adult_5.40.5 <- graph.adjacency(m_adult_5.40.5$adjacency, mode="undirected")
plot(g_adult_5.40.5, layout = layout.auto(g_adult_5.40.5))

head(str(m_adult_5.40.5$level_of_vertex))
##  int [1:5] 1 2 3 4 5
## NULL
head(str(m_adult_5.40.5$vertices_in_level))
## List of 5
##  $ : num 1
##  $ : num 2
##  $ : num 3
##  $ : num 4
##  $ : num 5
## NULL
head(str(m_adult_5.40.5$points_in_vertex))
## List of 5
##  $ : int [1:3373] 8 10 12 21 26 46 64 69 73 95 ...
##  $ : int [1:10276] 2 8 10 11 12 15 21 24 26 28 ...
##  $ : int [1:11563] 1 2 4 6 9 16 19 20 23 24 ...
##  $ : int [1:14818] 1 3 4 5 6 9 13 14 16 17 ...
##  $ : int [1:12081] 7 13 14 18 22 25 27 32 36 37 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_adult_5.40.5$level_of_vertex, na.rm=TRUE)
my_vector = m_adult_5.40.5$level_of_vertex / my_max

my_colors = my_palette(my_resolution)[as.numeric(cut(
                       my_vector, breaks=my_resolution))]

g_adult_5.40.5 <- graph.adjacency(m_adult_5.40.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_adult_5.40.5$points_in_vertex,
                             function(x) length(x)))

plot(g_adult_5.50.5, layout = layout.auto(g_adult_5.40.5),
     vertex.size = 30*log(vertex_size)/
     max(log(vertex_size)),
     vertex.color = my_colors)

m_adult_5.40.5.n1<-m_adult_5.40.5$points_in_vertex[1]
    m_adult_5.40.5.n1.vec<-as.vector(unlist(m_adult_5.40.5.n1))
m_adult_5.40.5.n2<-m_adult_5.40.5$points_in_vertex[2]
    m_adult_5.40.5.n2.vec<-as.vector(unlist(m_adult_5.40.5.n2)) 
m_adult_5.40.5.n3<-m_adult_5.40.5$points_in_vertex[3]
    m_adult_5.40.5.n3.vec<-as.vector(unlist(m_adult_5.40.5.n3))
m_adult_5.40.5.n4<-m_adult_5.40.5$points_in_vertex[4]
    m_adult_5.40.5.n4.vec<-as.vector(unlist(m_adult_5.40.5.n4)) 
m_adult_5.40.5.n5<-m_adult_5.40.5$points_in_vertex[5]
    m_adult_5.40.5.n5.vec<-as.vector(unlist(m_adult_5.40.5.n5))

##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_adult_5.40.5.n1.vec<-adult.one_hot_df4[m_adult_5.40.5.n1.vec,]
tda.m_adult_5.40.5.n2.vec<-adult.one_hot_df4[m_adult_5.40.5.n2.vec,]
tda.m_adult_5.40.5.n3.vec<-adult.one_hot_df4[m_adult_5.40.5.n3.vec,]
tda.m_adult_5.40.5.n4.vec<-adult.one_hot_df4[m_adult_5.40.5.n4.vec,]
tda.m_adult_5.40.5.n5.vec<-adult.one_hot_df4[m_adult_5.40.5.n5.vec,]


##*** Adult Mapper 5 intervals, 30% overlap, 5 bins

m_adult_5.30.5 <- mapper1D(
     distance_matrix = dist(adult.one_hot_df),
     filter_values = c(ts_pca_b$PC1),
     num_intervals = 5,
     percent_overlap = 30,
     num_bins_when_clustering = 5)


g_adult_5.30.5 <- graph.adjacency(m_adult_5.40.5$adjacency, mode="undirected")
plot(g_adult_5.30.5, layout = layout.auto(g_adult_5.30.5))

head(str(m_adult_5.30.5$level_of_vertex))
##  int [1:6] 1 1 2 3 4 5
## NULL
head(str(m_adult_5.30.5$vertices_in_level))
## List of 5
##  $ : num [1:2] 1 2
##  $ : num 3
##  $ : num 4
##  $ : num 5
##  $ : num 6
## NULL
head(str(m_adult_5.30.5$points_in_vertex))
## List of 6
##  $ : int [1:2254] 8 21 26 64 69 73 97 101 102 112 ...
##  $ : int 27366
##  $ : int [1:8644] 2 8 10 11 12 15 21 26 28 39 ...
##  $ : int [1:10534] 2 4 6 9 16 19 20 23 24 28 ...
##  $ : int [1:13627] 1 3 4 5 6 14 16 17 18 25 ...
##  $ : int [1:9944] 7 13 22 25 32 36 38 44 52 62 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_adult_5.30.5$level_of_vertex, na.rm=TRUE)
my_vector = m_adult_5.30.5$level_of_vertex / my_max

my_colors = my_palette(my_resolution)[as.numeric(cut(
                       my_vector, breaks=my_resolution))]

g_adult_5.30.5 <- graph.adjacency(m_adult_5.30.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_adult_5.30.5$points_in_vertex,
                             function(x) length(x)))

plot(g_adult_5.50.5, layout = layout.auto(g_adult_5.30.5),
     vertex.size = 30*log(vertex_size)/
     max(log(vertex_size)),
     vertex.color = my_colors)

m_adult_5.30.5.n1<-m_adult_5.30.5$points_in_vertex[1]
    m_adult_5.30.5.n1.vec<-as.vector(unlist(m_adult_5.30.5.n1))
m_adult_5.30.5.n2<-m_adult_5.30.5$points_in_vertex[2]
    m_adult_5.30.5.n2.vec<-as.vector(unlist(m_adult_5.30.5.n2)) 
m_adult_5.30.5.n3<-m_adult_5.30.5$points_in_vertex[3]
    m_adult_5.30.5.n3.vec<-as.vector(unlist(m_adult_5.30.5.n3))
m_adult_5.30.5.n4<-m_adult_5.30.5$points_in_vertex[4]
    m_adult_5.30.5.n4.vec<-as.vector(unlist(m_adult_5.30.5.n4)) 
m_adult_5.30.5.n5<-m_adult_5.30.5$points_in_vertex[5]
    m_adult_5.30.5.n5.vec<-as.vector(unlist(m_adult_5.30.5.n5))

##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_adult_5.30.5.n1.vec<-adult.one_hot_df4[m_adult_5.30.5.n1.vec,]
tda.m_adult_5.30.5.n2.vec<-adult.one_hot_df4[m_adult_5.30.5.n2.vec,]
tda.m_adult_5.30.5.n3.vec<-adult.one_hot_df4[m_adult_5.30.5.n3.vec,]
tda.m_adult_5.30.5.n4.vec<-adult.one_hot_df4[m_adult_5.30.5.n4.vec,]
tda.m_adult_5.30.5.n5.vec<-adult.one_hot_df4[m_adult_5.30.5.n5.vec,]


##*** Adult Mapper KDE Filter 5 intervals, 50% overlap, 5 bins

m_kde_adult_5.50.5 <- mapper1D(
     distance_matrix = dist(adult.one_hot_df),
     filter_values = c(filter.kde),
     num_intervals = 5,
     percent_overlap = 50,
     num_bins_when_clustering = 5)

g_kde_adult_5.50.5 <- graph.adjacency(m_kde_adult_5.50.5$adjacency, mode="undirected")
plot(g_kde_adult_5.50.5, layout = layout.auto(g_kde_adult_5.50.5))

head(str(m_kde_adult_5.50.5$level_of_vertex))
##  int [1:5] 1 2 3 4 5
## NULL
head(str(m_kde_adult_5.50.5$vertices_in_level))
## List of 5
##  $ : num 1
##  $ : num 2
##  $ : num 3
##  $ : num 4
##  $ : num 5
## NULL
head(str(m_kde_adult_5.50.5$points_in_vertex))
## List of 5
##  $ : int [1:13387] 2 4 5 6 7 9 16 19 20 21 ...
##  $ : int [1:12638] 1 2 6 8 9 13 20 24 25 26 ...
##  $ : int [1:11634] 1 8 10 11 12 13 14 27 28 30 ...
##  $ : int [1:10038] 3 10 11 12 14 15 27 30 32 34 ...
##  $ : int [1:7540] 3 15 17 18 37 39 56 59 60 65 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_kde_adult_5.50.5$level_of_vertex, na.rm=TRUE)
my_vector = m_kde_adult_5.50.5$level_of_vertex / my_max

my_colors = my_palette(my_resolution)[as.numeric(cut(
                       my_vector, breaks=my_resolution))]

g_kde_adult_5.50.5 <- graph.adjacency(m_kde_adult_5.50.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_kde_adult_5.50.5$points_in_vertex,
                             function(x) length(x)))

plot(g_kde_adult_5.50.5, layout = layout.auto(g_kde_adult_5.50.5),
     vertex.size = 30*log(vertex_size)/
     max(log(vertex_size)),
     vertex.color = my_colors)

##Extract the ID observations of each mapper output vertex
m_kde_adult_5.50.5.n1<-m_kde_adult_5.50.5$points_in_vertex[1]
    m_kde_adult_5.50.5.n1.vec<-as.vector(unlist(m_kde_adult_5.50.5.n1))
m_kde_adult_5.50.5.n2<-m_kde_adult_5.50.5$points_in_vertex[2]
    m_kde_adult_5.50.5.n2.vec<-as.vector(unlist(m_kde_adult_5.50.5.n2)) 
m_kde_adult_5.50.5.n3<-m_kde_adult_5.50.5$points_in_vertex[3]
    m_kde_adult_5.50.5.n3.vec<-as.vector(unlist(m_kde_adult_5.50.5.n3))
m_kde_adult_5.50.5.n4<-m_kde_adult_5.50.5$points_in_vertex[4]
    m_kde_adult_5.50.5.n4.vec<-as.vector(unlist(m_kde_adult_5.50.5.n4)) 
m_kde_adult_5.50.5.n5<-m_kde_adult_5.50.5 $points_in_vertex[5]
    m_kde_adult_5.50.5.n5.vec<-as.vector(unlist(m_kde_adult_5.50.5.n5))

##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_kde_adult_5.50.5.n1.vec<-adult.one_hot_df4[m_kde_adult_5.50.5.n1.vec,]
tda.m_kde_adult_5.50.5.n2.vec<-adult.one_hot_df4[m_kde_adult_5.50.5.n2.vec,]
tda.m_kde_adult_5.50.5.n3.vec<-adult.one_hot_df4[m_kde_adult_5.50.5.n3.vec,]
tda.m_kde_adult_5.50.5.n4.vec<-adult.one_hot_df4[m_kde_adult_5.50.5.n4.vec,]
tda.m_kde_adult_5.50.5.n5.vec<-adult.one_hot_df4[m_kde_adult_5.50.5.n5.vec,]




##*** Adult Mapper KDE 5 intervals, 40% overlap, 5 bins

m_kde_adult_5.40.5 <- mapper1D(
     distance_matrix = dist(adult.one_hot_df),
     filter_values = c(filter.kde),
     num_intervals = 5,
     percent_overlap = 40,
     num_bins_when_clustering = 5)


g_kde_adult_5.40.5 <- graph.adjacency(m_kde_adult_5.40.5$adjacency, mode="undirected")
plot(g_kde_adult_5.40.5, layout = layout.auto(g_kde_adult_5.40.5))

head(str(m_kde_adult_5.40.5$level_of_vertex))
##  int [1:5] 1 2 3 4 5
## NULL
head(str(m_kde_adult_5.40.5$vertices_in_level))
## List of 5
##  $ : num 1
##  $ : num 2
##  $ : num 3
##  $ : num 4
##  $ : num 5
## NULL
head(str(m_kde_adult_5.40.5$points_in_vertex))
## List of 5
##  $ : int [1:11838] 4 5 6 7 9 16 19 20 21 22 ...
##  $ : int [1:11203] 1 2 6 9 13 20 24 25 26 29 ...
##  $ : int [1:10351] 1 8 10 11 12 14 27 28 30 31 ...
##  $ : int [1:8741] 3 10 11 12 14 15 27 30 32 34 ...
##  $ : int [1:6628] 3 15 17 18 37 39 59 60 65 66 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_kde_adult_5.40.5$level_of_vertex, na.rm=TRUE)
my_vector = m_kde_adult_5.40.5$level_of_vertex / my_max

my_colors = my_palette(my_resolution)[as.numeric(cut(
                       my_vector, breaks=my_resolution))]

g_kde_adult_5.40.5 <- graph.adjacency(m_kde_adult_5.40.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_kde_adult_5.40.5$points_in_vertex,
                             function(x) length(x)))

plot(g_kde_adult_5.40.5, layout = layout.auto(g_kde_adult_5.40.5),
     vertex.size = 30*log(vertex_size)/
     max(log(vertex_size)),
     vertex.color = my_colors)

##Extract the ID observations of each mapper output vertex
m_kde_adult_5.40.5.n1<-m_kde_adult_5.40.5$points_in_vertex[1]
    m_kde_adult_5.40.5.n1.vec<-as.vector(unlist(m_kde_adult_5.40.5.n1))
m_kde_adult_5.40.5.n2<-m_kde_adult_5.40.5$points_in_vertex[2]
    m_kde_adult_5.40.5.n2.vec<-as.vector(unlist(m_kde_adult_5.40.5.n2)) 
m_kde_adult_5.40.5.n3<-m_kde_adult_5.40.5$points_in_vertex[3]
    m_kde_adult_5.40.5.n3.vec<-as.vector(unlist(m_kde_adult_5.40.5.n3))
m_kde_adult_5.40.5.n4<-m_kde_adult_5.40.5$points_in_vertex[4]
    m_kde_adult_5.40.5.n4.vec<-as.vector(unlist(m_kde_adult_5.40.5.n4)) 
m_kde_adult_5.40.5.n5<-m_kde_adult_5.40.5 $points_in_vertex[5]
    m_kde_adult_5.40.5.n5.vec<-as.vector(unlist(m_kde_adult_5.40.5.n5))

##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF4 dataset
tda.m_kde_adult_5.40.5.n1.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n1.vec,]
tda.m_kde_adult_5.40.5.n2.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n2.vec,]
tda.m_kde_adult_5.40.5.n3.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n3.vec,]
tda.m_kde_adult_5.40.5.n4.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n4.vec,]
tda.m_kde_adult_5.40.5.n5.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n5.vec,]



##*** Adult Mapper KDE 5 intervals, 30% overlap, 5 bins

m_kde_adult_5.30.5 <- mapper1D(
     distance_matrix = dist(adult.one_hot_df),
     filter_values = c(filter.kde),
     num_intervals = 5,
     percent_overlap = 50,
     num_bins_when_clustering = 5)


g_kde_adult_5.30.5 <- graph.adjacency(m_kde_adult_5.30.5$adjacency, mode="undirected")
plot(g_kde_adult_5.30.5, layout = layout.auto(g_kde_adult_5.30.5))

head(str(m_kde_adult_5.30.5$level_of_vertex))
##  int [1:5] 1 2 3 4 5
## NULL
head(str(m_kde_adult_5.30.5$vertices_in_level))
## List of 5
##  $ : num 1
##  $ : num 2
##  $ : num 3
##  $ : num 4
##  $ : num 5
## NULL
head(str(m_kde_adult_5.30.5$points_in_vertex))
## List of 5
##  $ : int [1:13387] 2 4 5 6 7 9 16 19 20 21 ...
##  $ : int [1:12638] 1 2 6 8 9 13 20 24 25 26 ...
##  $ : int [1:11634] 1 8 10 11 12 13 14 27 28 30 ...
##  $ : int [1:10038] 3 10 11 12 14 15 27 30 32 34 ...
##  $ : int [1:7540] 3 15 17 18 37 39 56 59 60 65 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_kde_adult_5.30.5$level_of_vertex, na.rm=TRUE)
my_vector = m_kde_adult_5.30.5$level_of_vertex / my_max

my_colors = my_palette(my_resolution)[as.numeric(cut(
                       my_vector, breaks=my_resolution))]

g_kde_adult_5.30.5 <- graph.adjacency(m_kde_adult_5.30.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_kde_adult_5.30.5$points_in_vertex,
                             function(x) length(x)))

plot(g_kde_adult_5.30.5, layout = layout.auto(g_kde_adult_5.30.5),
     vertex.size = 30*log(vertex_size)/
     max(log(vertex_size)),
     vertex.color = my_colors)

##Extract the ID observations of each mapper output vertex
m_kde_adult_5.30.5.n1<-m_kde_adult_5.30.5$points_in_vertex[1]
    m_kde_adult_5.30.5.n1.vec<-as.vector(unlist(m_kde_adult_5.30.5.n1))
m_kde_adult_5.30.5.n2<-m_kde_adult_5.30.5$points_in_vertex[2]
    m_kde_adult_5.30.5.n2.vec<-as.vector(unlist(m_kde_adult_5.30.5.n2)) 
m_kde_adult_5.30.5.n3<-m_kde_adult_5.30.5$points_in_vertex[3]
    m_kde_adult_5.30.5.n3.vec<-as.vector(unlist(m_kde_adult_5.30.5.n3))
m_kde_adult_5.30.5.n4<-m_kde_adult_5.30.5$points_in_vertex[4]
    m_kde_adult_5.30.5.n4.vec<-as.vector(unlist(m_kde_adult_5.30.5.n4)) 
m_kde_adult_5.30.5.n5<-m_kde_adult_5.30.5 $points_in_vertex[5]
    m_kde_adult_5.30.5.n5.vec<-as.vector(unlist(m_kde_adult_5.30.5.n5))

##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_kde_adult_5.30.5.n1.vec<-adult.one_hot_df4[m_kde_adult_5.30.5.n1.vec,]
tda.m_kde_adult_5.30.5.n2.vec<-adult.one_hot_df4[m_kde_adult_5.30.5.n2.vec,]
tda.m_kde_adult_5.30.5.n3.vec<-adult.one_hot_df4[m_kde_adult_5.30.5.n3.vec,]
tda.m_kde_adult_5.30.5.n4.vec<-adult.one_hot_df4[m_kde_adult_5.30.5.n4.vec,]
tda.m_kde_adult_5.30.5.n5.vec<-adult.one_hot_df4[m_kde_adult_5.30.5.n5.vec,]
library(caret)

trainIndex <- createDataPartition(adult.one_hot_df4$adult_df1, p = .7, 
                                  list = FALSE, 
                                  times = 1)

head(trainIndex)
##      Resample1
## [1,]         1
## [2,]         2
## [3,]         3
## [4,]         4
## [5,]         5
## [6,]         8
adult.one_hot_df4Train <- adult.one_hot_df4[ trainIndex,]
adult.one_hot_df4Test  <- adult.one_hot_df4[-trainIndex,]
#Train Control: k-Fold Cross-validation basis for all models 
fitControl <- trainControl(## 10-fold CV
                           method = "cv",
                           number = 3)
#Non-TDA-Assited
#Random Forest 
adultRfFit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train, 
                 Importance=T, n.tree=100,
                       method = 'rf', 
                 trControl = fitControl,
                         metric='Accuracy')

adultRfFit
## Random Forest 
## 
## 22793 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 15195, 15196, 15195 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     2   0.8070022  0.2956849
##    55   0.8587285  0.5899910
##   108   0.8546922  0.5798266
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 55.
adultRfFit$resample
##    Accuracy     Kappa Resample
## 1 0.8577257 0.5823456    Fold1
## 2 0.8602264 0.5962355    Fold3
## 3 0.8582335 0.5913918    Fold2
ad_rf_fit_re<-adultRfFit$resample[1]


summary(adultRfFit)
##                 Length Class      Mode     
## call                6  -none-     call     
## type                1  -none-     character
## predicted       22793  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           45586  matrix     numeric  
## oob.times       22793  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               22793  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               2  -none-     list
varImp (adultRfFit)
## rf variable importance
## 
##   only 20 most important variables shown (out of 108)
## 
##                       Overall
## V3                    100.000
## V6.Married.civ.spouse  88.358
## V11                    76.409
## V1                     76.084
## V5                     65.152
## V13                    44.554
## V8.Husband             31.880
## V12                    23.159
## V7.Exec.managerial     10.006
## V7.Prof.specialty       7.717
## V2.Private              7.037
## V8.Wife                 6.986
## V7.Craft.repair         5.798
## V2.Self.emp.not.inc     5.772
## V7.Sales                5.754
## V6.Never.married        5.735
## V2.Self.emp.inc         4.338
## V2.Local.gov            4.023
## V7.Adm.clerical         3.933
## V7.Transport.moving     3.863
# Predict outcome using model from training data based on testing data
predictions <- predict(adultRfFit, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
rf_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
rf_cf
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6920   905
##      >50K     496  1447
##                                           
##                Accuracy : 0.8566          
##                  95% CI : (0.8495, 0.8635)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5829          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9331          
##             Specificity : 0.6152          
##          Pos Pred Value : 0.8843          
##          Neg Pred Value : 0.7447          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7084          
##    Detection Prevalence : 0.8011          
##       Balanced Accuracy : 0.7742          
##                                           
##        'Positive' Class :  <=50K          
## 
rf_cf$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.565725e-01   5.829494e-01   8.494641e-01   8.634675e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  1.511642e-125   1.147941e-27
rf_cf_ov_acc<-rf_cf$overall[1]
rf_cf$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9331176            0.6152211            0.8843450 
##       Neg Pred Value            Precision               Recall 
##            0.7447247            0.8843450            0.9331176 
##                   F1           Prevalence       Detection Rate 
##            0.9080769            0.7592138            0.7084357 
## Detection Prevalence    Balanced Accuracy 
##            0.8010852            0.7741693
rf_cf_pre_rec_f1<-rf_cf$byClass[5:7]

##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node1

Adult_TDA_PC_5.50.5_n1_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n1.vec, 
                 Importance=T, n.tree=100,
                       method = 'rf', 
                 trControl = fitControl,
                         metric='Accuracy')

Adult_TDA_PC_5.50.5_n1_RfFit0
## Random Forest 
## 
## 4917 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 3278, 3277, 3279 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     2   0.9733579  0.0000000
##    55   0.9725445  0.1097594
##   108   0.9719342  0.1154372
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
Adult_TDA_PC_5.50.5_n1_RfFit0$resample
##    Accuracy Kappa Resample
## 1 0.9737485     0    Fold3
## 2 0.9731707     0    Fold2
## 3 0.9731544     0    Fold1
ad_tda_pc_5.50.5_n1_rf_fit0_re<-Adult_TDA_PC_5.50.5_n1_RfFit0$resample[1]


summary(Adult_TDA_PC_5.50.5_n1_RfFit0)
##                 Length Class      Mode     
## call               6   -none-     call     
## type               1   -none-     character
## predicted       4917   factor     numeric  
## err.rate        1500   -none-     numeric  
## confusion          6   -none-     numeric  
## votes           9834   matrix     numeric  
## oob.times       4917   -none-     numeric  
## classes            2   -none-     character
## importance       108   -none-     numeric  
## importanceSD       0   -none-     NULL     
## localImportance    0   -none-     NULL     
## proximity          0   -none-     NULL     
## ntree              1   -none-     numeric  
## mtry               1   -none-     numeric  
## forest            14   -none-     list     
## y               4917   factor     numeric  
## test               0   -none-     NULL     
## inbag              0   -none-     NULL     
## xNames           108   -none-     character
## problemType        1   -none-     character
## tuneValue          1   data.frame list     
## obsLevels          2   -none-     character
## param              2   -none-     list
varImp (Adult_TDA_PC_5.50.5_n1_RfFit0)
## rf variable importance
## 
##   only 20 most important variables shown (out of 108)
## 
##                     Overall
## V13                 100.000
## V2.Self.emp.not.inc  94.113
## V5                   68.061
## V1                   66.653
## V2.Private           65.355
## V4.Doctorate         35.644
## V11                  31.734
## V2.Self.emp.inc      26.326
## V3                   24.206
## V7.Farming.fishing   22.429
## V7.Prof.specialty    16.876
## V12                  15.995
## V4.Prof.school       11.432
## V4.Some.college      10.799
## V4.Masters            9.912
## V4.HS.grad            9.565
## V14.South             4.917
## V7.Craft.repair       4.520
## V7.Sales              4.446
## V4.Bachelors          3.956
# Predict outcome using Adult_TDA_PC_5.50.5_n1_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n1_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n1_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n1_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K      0     0
##      >50K    7416  2352
##                                           
##                Accuracy : 0.2408          
##                  95% CI : (0.2323, 0.2494)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0000          
##             Specificity : 1.0000          
##          Pos Pred Value :    NaN          
##          Neg Pred Value : 0.2408          
##              Prevalence : 0.7592          
##          Detection Rate : 0.0000          
##    Detection Prevalence : 0.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n1_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K      0     0
##      >50K    7416  2352
##                                           
##                Accuracy : 0.2408          
##                  95% CI : (0.2323, 0.2494)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0000          
##             Specificity : 1.0000          
##          Pos Pred Value :    NaN          
##          Neg Pred Value : 0.2408          
##              Prevalence : 0.7592          
##          Detection Rate : 0.0000          
##    Detection Prevalence : 0.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n1_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.2407862      0.0000000      0.2323343      0.2493929      0.7592138 
## AccuracyPValue  McnemarPValue 
##      1.0000000      0.0000000
ad_tda_pc_5.50.5_n1_rf_cf0_ov_acc<-ad_tda_pc_5.50.5_n1_rf_cf0$overall[1]
ad_tda_pc_5.50.5_n1_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.0000000            1.0000000                  NaN 
##       Neg Pred Value            Precision               Recall 
##            0.2407862                   NA            0.0000000 
##                   F1           Prevalence       Detection Rate 
##                   NA            0.7592138            0.0000000 
## Detection Prevalence    Balanced Accuracy 
##            0.0000000            0.5000000
ad_tda_pc_5.50.5_n1_rf_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n1_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_rf_n1_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.50.5_n1_rf_fit0_re)
diff_tda_pca_5.50.5_rf_n1_3_fold
##     Accuracy
## 1 -0.1160228
## 2 -0.1129444
## 3 -0.1149208
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_rf.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n1_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_rf.n1_3_fold_odds.left<-bst_tda_pca_5.50.5_rf.n1_3_fold$probLeft/bst_tda_pca_5.50.5_rf.n1_3_fold$probRight
bst_tda_pca_5.50.5_rf.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_rf.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n1_3_fold
## $winLeft
## [1] 0.991
## 
## $winRope
## [1] 0.009
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_rf.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n1_3_fold
## $left
## [1] 0.9999506
## 
## $rope
## [1] 1.457494e-05
## 
## $right
## [1] 3.480343e-05
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold))
#bf_tda_pca_5.50.5_rf.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_rf_n1_3_fold)
## t = -127.29, df = 2, p-value = 6.171e-05
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1185040 -0.1107546
## sample estimates:
##  mean of x 
## -0.1146293
### Test set diff
diff_tda_pca_5.50.5_rf.n1_test<-(rf_cf_ov_acc-ad_tda_pc_5.50.5_n1_rf_cf0_ov_acc)
diff_tda_pca_5.50.5_rf.n1_test
##  Accuracy 
## 0.6157862
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_rf.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf.n1_test),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_rf.n1_test_odds.left<-bst_tda_pca_5.50.5_rf.n1_test$probLeft/bst_tda_pca_5.50.5_rf.n1_test$probRight
bst_tda_pca_5.50.5_rf.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_rf.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf.n1_test),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1570333
## 
## $winRight
## [1] 0.8429667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_rf.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_rf.n1_test)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf.n1_test)) #bf_tda_pca_5.50.5_rf.n1_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_rf.n1_test))

##Node2

Adult_TDA_PC_5.50.5_n2_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec, 
                 Importance=T, n.tree=100,
                       method = 'rf', 
                 trControl = fitControl,
                         metric='Accuracy')
                         
Adult_TDA_PC_5.50.5_n2_RfFit0
## Random Forest 
## 
## 12206 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8136, 8138, 8138 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     2   0.7077661  0.4120403
##    55   0.7302137  0.4589437
##   108   0.7257890  0.4497935
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 55.
Adult_TDA_PC_5.50.5_n2_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.7361179 0.4709338    Fold1
## 2 0.7224680 0.4434426    Fold3
## 3 0.7320551 0.4624548    Fold2
ad_tda_pc_5.50.5_n2_rf_fit0_re<-Adult_TDA_PC_5.50.5_n2_RfFit0$resample[1]


summary(Adult_TDA_PC_5.50.5_n2_RfFit0)
##                 Length Class      Mode     
## call                6  -none-     call     
## type                1  -none-     character
## predicted       12206  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           24412  matrix     numeric  
## oob.times       12206  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               12206  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               2  -none-     list
varImp (Adult_TDA_PC_5.50.5_n2_RfFit0)
## rf variable importance
## 
##   only 20 most important variables shown (out of 108)
## 
##                      Overall
## V3                   100.000
## V1                    68.602
## V5                    49.314
## V11                   43.109
## V13                   41.302
## V12                   20.828
## V8.Husband            17.115
## V7.Exec.managerial     6.686
## V2.Private             6.347
## V2.Self.emp.not.inc    6.289
## V7.Craft.repair        5.899
## V7.Sales               5.370
## V7.Prof.specialty      4.619
## V7.Transport.moving    4.429
## V9.Black               4.236
## V2.Self.emp.inc        4.044
## V2.Local.gov           3.953
## V7.Machine.op.inspct   3.706
## V4.HS.grad             3.624
## V7.Adm.clerical        3.503
# Predict outcome using Adult_TDA_PC_5.50.5_n2_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n2_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n2_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n2_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   1956     5
##      >50K    5460  2347
##                                           
##                Accuracy : 0.4405          
##                  95% CI : (0.4306, 0.4504)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.146           
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.2638          
##             Specificity : 0.9979          
##          Pos Pred Value : 0.9975          
##          Neg Pred Value : 0.3006          
##              Prevalence : 0.7592          
##          Detection Rate : 0.2002          
##    Detection Prevalence : 0.2008          
##       Balanced Accuracy : 0.6308          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n2_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   1956     5
##      >50K    5460  2347
##                                           
##                Accuracy : 0.4405          
##                  95% CI : (0.4306, 0.4504)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.146           
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.2638          
##             Specificity : 0.9979          
##          Pos Pred Value : 0.9975          
##          Neg Pred Value : 0.3006          
##              Prevalence : 0.7592          
##          Detection Rate : 0.2002          
##    Detection Prevalence : 0.2008          
##       Balanced Accuracy : 0.6308          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n2_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.4405201      0.1460088      0.4306427      0.4504329      0.7592138 
## AccuracyPValue  McnemarPValue 
##      1.0000000      0.0000000
ad_tda_pc_5.50.5_n2_rf_cf0_ov_acc<-ad_tda_pc_5.50.5_n2_rf_cf0$overall[1]
ad_tda_pc_5.50.5_n2_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.2637540            0.9978741            0.9974503 
##       Neg Pred Value            Precision               Recall 
##            0.3006276            0.9974503            0.2637540 
##                   F1           Prevalence       Detection Rate 
##            0.4171910            0.7592138            0.2002457 
## Detection Prevalence    Balanced Accuracy 
##            0.2007576            0.6308141
ad_tda_pc_5.50.5_n2_rf_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n2_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_rf_n2_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.50.5_n2_rf_fit0_re)
diff_tda_pca_5.50.5_rf_n2_3_fold
##    Accuracy
## 1 0.1216078
## 2 0.1377583
## 3 0.1261784
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_rf.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_rf.n2_3_fold_odds.left<-bst_tda_pca_5.50.5_rf.n2_3_fold$probLeft/bst_tda_pca_5.50.5_rf.n2_3_fold$probRight
bst_tda_pca_5.50.5_rf.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_rf.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.009166667
## 
## $winRight
## [1] 0.9908333
# Bayesian Correlated Test

bct_tda_pca_5.50.5_rf.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n2_3_fold
## $left
## [1] 0.0008007738
## 
## $rope
## [1] 0.0002921139
## 
## $right
## [1] 0.9989071
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold))
#bf_tda_pca_5.50.5_rf.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_rf_n2_3_fold)
## t = 26.738, df = 2, p-value = 0.001396
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.1078346 0.1491951
## sample estimates:
## mean of x 
## 0.1285149
### Test set diff
diff_tda_pca_5.50.5_rf.n2_test<-(rf_cf_ov_acc-ad_tda_pc_5.50.5_n2_rf_cf0_ov_acc)
diff_tda_pca_5.50.5_rf.n2_test
##  Accuracy 
## 0.4160524
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_rf.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf.n2_test),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_rf.n2_test_odds.left<-bst_tda_pca_5.50.5_rf.n2_test$probLeft/bst_tda_pca_5.50.5_rf.n2_test$probRight
bst_tda_pca_5.50.5_rf.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_rf.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf.n2_test),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1559333
## 
## $winRight
## [1] 0.8440667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_rf.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(as.matrix(diff_tda_pca_5.50.5_rf.n2_test),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf.n2_test)) #bf_tda_pca_5.50.5_rf.n2_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_rf.n2_test))

##Node3

Adult_TDA_PC_5.50.5_n3_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n3.vec, 
                 Importance=T, n.tree=100,
                       method = 'rf', 
                 trControl = fitControl,
                         metric='Accuracy')

Adult_TDA_PC_5.50.5_n3_RfFit0
## Random Forest 
## 
## 13240 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8827, 8827, 8826 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     2   0.7941844  0.1885370
##    55   0.8434294  0.4983357
##   108   0.8395774  0.4903697
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 55.
Adult_TDA_PC_5.50.5_n3_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8474960 0.5104167    Fold1
## 2 0.8382420 0.4885495    Fold3
## 3 0.8445502 0.4960410    Fold2
ad_tda_pc_5.50.5_n3_rf_fit0_re<-Adult_TDA_PC_5.50.5_n3_RfFit0$resample[1]


summary(Adult_TDA_PC_5.50.5_n3_RfFit0)
##                 Length Class      Mode     
## call                6  -none-     call     
## type                1  -none-     character
## predicted       13240  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           26480  matrix     numeric  
## oob.times       13240  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               13240  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               2  -none-     list
varImp (Adult_TDA_PC_5.50.5_n3_RfFit0)
## rf variable importance
## 
##   only 20 most important variables shown (out of 108)
## 
##                       Overall
## V3                    100.000
## V1                     73.769
## V11                    63.907
## V13                    39.813
## V10.Male               38.125
## V10.Female             34.484
## V5                     20.554
## V12                    11.902
## V2.Private              7.854
## V7.Adm.clerical         6.049
## V8.Wife                 5.965
## V6.Married.civ.spouse   5.431
## V7.Sales                5.416
## V7.Exec.managerial      5.355
## V4.Some.college         5.326
## V7.Prof.specialty       5.079
## V4.HS.grad              4.928
## V7.Craft.repair         4.539
## V9.White                4.434
## V9.Black                4.423
# Predict outcome using Adult_TDA_PC_5.50.5_n3_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n3_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n3_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n3_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   4793  1129
##      >50K    2623  1223
##                                           
##                Accuracy : 0.6159          
##                  95% CI : (0.6062, 0.6255)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.1367          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.6463          
##             Specificity : 0.5200          
##          Pos Pred Value : 0.8094          
##          Neg Pred Value : 0.3180          
##              Prevalence : 0.7592          
##          Detection Rate : 0.4907          
##    Detection Prevalence : 0.6063          
##       Balanced Accuracy : 0.5831          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n3_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   4793  1129
##      >50K    2623  1223
##                                           
##                Accuracy : 0.6159          
##                  95% CI : (0.6062, 0.6255)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.1367          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.6463          
##             Specificity : 0.5200          
##          Pos Pred Value : 0.8094          
##          Neg Pred Value : 0.3180          
##              Prevalence : 0.7592          
##          Detection Rate : 0.4907          
##    Detection Prevalence : 0.6063          
##       Balanced Accuracy : 0.5831          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n3_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   6.158886e-01   1.366520e-01   6.061585e-01   6.255496e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.000000e+00  3.220547e-131
ad_tda_pc_5.50.5_n3_rf_cf0_ov_acc<-ad_tda_pc_5.50.5_n3_rf_cf0$overall[1]
ad_tda_pc_5.50.5_n3_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.6463053            0.5199830            0.8093549 
##       Neg Pred Value            Precision               Recall 
##            0.3179927            0.8093549            0.6463053 
##                   F1           Prevalence       Detection Rate 
##            0.7186985            0.7592138            0.4906839 
## Detection Prevalence    Balanced Accuracy 
##            0.6062654            0.5831441
ad_tda_pc_5.50.5_n3_rf_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n3_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_rf_n3_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.50.5_n3_rf_fit0_re)
diff_tda_pca_5.50.5_rf_n3_3_fold
##     Accuracy
## 1 0.01022968
## 2 0.02198442
## 3 0.01368332
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_rf.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_rf.n3_3_fold_odds.left<-bst_tda_pca_5.50.5_rf.n3_3_fold$probLeft/bst_tda_pca_5.50.5_rf.n3_3_fold$probRight
bst_tda_pca_5.50.5_rf.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_rf.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.09083333
## 
## $winRight
## [1] 0.9091667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_rf.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n2_3_fold
## $left
## [1] 0.0008007738
## 
## $rope
## [1] 0.0002921139
## 
## $right
## [1] 0.9989071
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold))
#bf_tda_pca_5.50.5_rf.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_rf_n3_3_fold)
## t = 4.386, df = 2, p-value = 0.04825
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.0002908355 0.0303074455
## sample estimates:
##  mean of x 
## 0.01529914
### Test set diff
diff_tda_pca_5.50.5_rf.n3_test<-(rf_cf_ov_acc-ad_tda_pc_5.50.5_n3_rf_cf0_ov_acc)
diff_tda_pca_5.50.5_rf.n3_test
##  Accuracy 
## 0.2406839
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_rf.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf.n3_test),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_rf.n3_test_odds.left<-bst_tda_pca_5.50.5_rf.n3_test$probLeft/bst_tda_pca_5.50.5_rf.n3_test$probRight
bst_tda_pca_5.50.5_rf.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_rf.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf.n3_test),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1602333
## 
## $winRight
## [1] 0.8397667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_rf.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_rf.n3_test))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf.n3_test)) #bf_tda_pca_5.50.5_rf.n3_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_rf.n2_test)

##Node4

Adult_TDA_PC_5.50.5_n4_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n4.vec, 
                 Importance=T, n.tree=100,
                method = 'rf', 
                 trControl = fitControl,
                metric='Accuracy')

Adult_TDA_PC_5.50.5_n4_RfFit0
## Random Forest 
## 
## 16700 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 11133, 11134, 11133 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     2   0.9449102  0.0000000
##    55   0.9543712  0.3859860
##   108   0.9541317  0.3900239
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 55.
Adult_TDA_PC_5.50.5_n4_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.9545536 0.4088788    Fold1
## 2 0.9543740 0.3628334    Fold3
## 3 0.9541861 0.3862458    Fold2
ad_tda_pc_5.50.5_n4_rf_fit0_re<-Adult_TDA_PC_5.50.5_n4_RfFit0$resample[1]


summary(Adult_TDA_PC_5.50.5_n4_RfFit0)
##                 Length Class      Mode     
## call                6  -none-     call     
## type                1  -none-     character
## predicted       16700  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           33400  matrix     numeric  
## oob.times       16700  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               16700  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               2  -none-     list
varImp (Adult_TDA_PC_5.50.5_n4_RfFit0)
## rf variable importance
## 
##   only 20 most important variables shown (out of 108)
## 
##                       Overall
## V11                   100.000
## V3                     96.754
## V1                     67.935
## V13                    38.125
## V8.Wife                24.948
## V5                     17.823
## V12                    10.747
## V7.Adm.clerical        10.074
## V2.Private              7.639
## V7.Exec.managerial      6.168
## V4.Some.college         6.014
## V7.Prof.specialty       5.380
## V6.Divorced             5.335
## V9.Black                5.319
## V10.Female              5.213
## V10.Male                5.163
## V6.Married.civ.spouse   5.027
## V6.Never.married        4.949
## V8.Not.in.family        4.928
## V7.Sales                4.912
# Predict outcome using Adult_TDA_PC_5.50.5_n4_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n4_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n4_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n4_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7414  1708
##      >50K       2   644
##                                           
##                Accuracy : 0.8249          
##                  95% CI : (0.8173, 0.8324)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3636          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9997          
##             Specificity : 0.2738          
##          Pos Pred Value : 0.8128          
##          Neg Pred Value : 0.9969          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7590          
##    Detection Prevalence : 0.9339          
##       Balanced Accuracy : 0.6368          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n4_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7414  1708
##      >50K       2   644
##                                           
##                Accuracy : 0.8249          
##                  95% CI : (0.8173, 0.8324)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3636          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9997          
##             Specificity : 0.2738          
##          Pos Pred Value : 0.8128          
##          Neg Pred Value : 0.9969          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7590          
##    Detection Prevalence : 0.9339          
##       Balanced Accuracy : 0.6368          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n4_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.249386e-01   3.635798e-01   8.172547e-01   8.324282e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   4.874619e-56   0.000000e+00
ad_tda_pc_5.50.5_n4_rf_cf0_ov_acc<-ad_tda_pc_5.50.5_n4_rf_cf0$overall[1]
ad_tda_pc_5.50.5_n4_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9997303            0.2738095            0.8127604 
##       Neg Pred Value            Precision               Recall 
##            0.9969040            0.8127604            0.9997303 
##                   F1           Prevalence       Detection Rate 
##            0.8966018            0.7592138            0.7590090 
## Detection Prevalence    Balanced Accuracy 
##            0.9338657            0.6367699
ad_tda_pc_5.50.5_n4_rf_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n4_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_rf_n4_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.50.5_n4_rf_fit0_re)
diff_tda_pca_5.50.5_rf_n4_3_fold
##      Accuracy
## 1 -0.09682790
## 2 -0.09414761
## 3 -0.09595262
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_rf.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_rf.n4_3_fold_odds.left<-bst_tda_pca_5.50.5_rf.n4_3_fold$probLeft/bst_tda_pca_5.50.5_rf.n4_3_fold$probRight
bst_tda_pca_5.50.5_rf.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_rf.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n4_3_fold
## $winLeft
## [1] 0.9908333
## 
## $winRope
## [1] 0.009166667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_rf.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n4_3_fold
## $left
## [1] 0.9999434
## 
## $rope
## [1] 1.939537e-05
## 
## $right
## [1] 3.719125e-05
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold))
#bf_tda_pca_5.50.5_rf.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_rf_n4_3_fold)
## t = -121.21, df = 2, p-value = 6.806e-05
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.09903792 -0.09224751
## sample estimates:
##   mean of x 
## -0.09564271
### Test set diff
diff_tda_pca_5.50.5_rf.n4_test<-(rf_cf_ov_acc-ad_tda_pc_5.50.5_n4_rf_cf0_ov_acc)
diff_tda_pca_5.50.5_rf.n4_test
##   Accuracy 
## 0.03163391
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_rf.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf.n4_test),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_rf.n4_test_odds.left<-bst_tda_pca_5.50.5_rf.n4_test$probLeft/bst_tda_pca_5.50.5_rf.n4_test$probRight
bst_tda_pca_5.50.5_rf.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_rf.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf.n4_test),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1563
## 
## $winRight
## [1] 0.8437
# Bayesian Correlated Test

bct_tda_pca_5.50.5_rf.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_rf.n4_test))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf.n4_test)) #bf_tda_pca_5.50.5_rf.n4_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_rf.n4_test))

##Node5

Adult_TDA_PC_5.50.5_n5_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n5.vec, 
                 Importance=T, n.tree=100,
                       method = 'rf', 
                 trControl = fitControl,
                         metric='Accuracy')

Adult_TDA_PC_5.50.5_n5_RfFit0
## Random Forest 
## 
## 14404 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9602, 9603, 9603 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     2   0.9979867  0.0000000
##    55   0.9981256  0.2793028
##   108   0.9979867  0.2659022
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 55.
Adult_TDA_PC_5.50.5_n5_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.9975010 0.1418359    Fold1
## 2 0.9985420 0.3632023    Fold3
## 3 0.9983337 0.3328701    Fold2
ad_tda_pc_5.50.5_n5_rf_fit0_re<-Adult_TDA_PC_5.50.5_n5_RfFit0$resample[1]


summary(Adult_TDA_PC_5.50.5_n5_RfFit0)
##                 Length Class      Mode     
## call                6  -none-     call     
## type                1  -none-     character
## predicted       14404  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           28808  matrix     numeric  
## oob.times       14404  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               14404  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               2  -none-     list
varImp (Adult_TDA_PC_5.50.5_n5_RfFit0)
## rf variable importance
## 
##   only 20 most important variables shown (out of 108)
## 
##                          Overall
## V3                       100.000
## V11                       75.709
## V1                        53.069
## V13                       37.465
## V7.Other.service          12.234
## V5                         8.415
## V8.Not.in.family           7.769
## V7.Adm.clerical            7.586
## V6.Never.married           7.133
## V8.Unmarried               7.068
## V4.HS.grad                 6.558
## V9.Black                   6.524
## V4.Some.college            6.315
## V6.Divorced                5.049
## V9.White                   4.619
## V2.Federal.gov             4.384
## V2.Private                 4.010
## V8.Own.child               3.991
## V2.State.gov               3.847
## V6.Married.spouse.absent   3.597
# Predict outcome using Adult_TDA_PC_5.50.5_n5_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n5_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n5_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n5_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7414  2125
##      >50K       2   227
##                                           
##                Accuracy : 0.7822          
##                  95% CI : (0.7739, 0.7904)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 3.909e-08       
##                                           
##                   Kappa : 0.1391          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.99973         
##             Specificity : 0.09651         
##          Pos Pred Value : 0.77723         
##          Neg Pred Value : 0.99127         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75901         
##    Detection Prevalence : 0.97656         
##       Balanced Accuracy : 0.54812         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n5_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7414  2125
##      >50K       2   227
##                                           
##                Accuracy : 0.7822          
##                  95% CI : (0.7739, 0.7904)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 3.909e-08       
##                                           
##                   Kappa : 0.1391          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.99973         
##             Specificity : 0.09651         
##          Pos Pred Value : 0.77723         
##          Neg Pred Value : 0.99127         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75901         
##    Detection Prevalence : 0.97656         
##       Balanced Accuracy : 0.54812         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n5_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.822482e-01   1.391173e-01   7.739288e-01   7.903989e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   3.908539e-08   0.000000e+00
ad_tda_pc_5.50.5_n5_rf_cf0_ov_acc<-ad_tda_pc_5.50.5_n5_rf_cf0$overall[1]
ad_tda_pc_5.50.5_n5_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##           0.99973031           0.09651361           0.77723032 
##       Neg Pred Value            Precision               Recall 
##           0.99126638           0.77723032           0.99973031 
##                   F1           Prevalence       Detection Rate 
##           0.87455028           0.75921376           0.75900901 
## Detection Prevalence    Balanced Accuracy 
##           0.97655610           0.54812196
ad_tda_pc_5.50.5_n5_rf_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n5_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_rf_n5_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.50.5_n5_rf_fit0_re)
diff_tda_pca_5.50.5_rf_n5_3_fold
##     Accuracy
## 1 -0.1397753
## 2 -0.1383156
## 3 -0.1401002
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_rf.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_rf.n5_3_fold_odds.left<-bst_tda_pca_5.50.5_rf.n5_3_fold$probLeft/bst_tda_pca_5.50.5_rf.n5_3_fold$probRight
bst_tda_pca_5.50.5_rf.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_rf.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n5_3_fold
## $winLeft
## [1] 0.9912667
## 
## $winRope
## [1] 0.008733333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_rf.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n5_3_fold
## $left
## [1] 0.999988
## 
## $rope
## [1] 2.995516e-06
## 
## $right
## [1] 8.995439e-06
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_rf_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf_n5_3_fold))
#bf_tda_pca_5.50.5_rf.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_rf_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_rf_n5_3_fold)
## t = -254.01, df = 2, p-value = 1.55e-05
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1417583 -0.1370358
## sample estimates:
## mean of x 
## -0.139397
### Test set diff
diff_tda_pca_5.50.5_rf.n5_test<-(rf_cf_ov_acc-ad_tda_pc_5.50.5_n5_rf_cf0_ov_acc)
diff_tda_pca_5.50.5_rf.n5_test
##   Accuracy 
## 0.07432432
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_rf.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_rf.n5_test),-0.01,0.01)
bst_tda_pca_5.50.5_rf.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_rf.n5_test_odds.left<-bst_tda_pca_5.50.5_rf.n5_test$probLeft/bst_tda_pca_5.50.5_rf.n5_test$probRight
bst_tda_pca_5.50.5_rf.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_rf.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_rf.n5_test),-0.01,0.01)
bsr_tda_pca_5.50.5_rf.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1621
## 
## $winRight
## [1] 0.8379
# Bayesian Correlated Test

bct_tda_pca_5.50.5_rf.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_rf.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_rf.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_rf.n5_test))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_rf.n5_test)) #bf_tda_pca_5.50.5_rf.n5_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_rf.n5_test))

##With TDA KDE filter 5 intervals, 50% overlap, 5 bins 
##Node1

Adult_TDA_KDE_5.50.5_n1_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n1.vec, 
                 Importance=T, n.tree=100,
                       method = 'rf', 
                 trControl = fitControl,
                         metric='Accuracy')

Adult_TDA_KDE_5.50.5_n1_RfFit0
## Random Forest 
## 
## 13387 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8924, 8926, 8924 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     2   0.8147450  0.3981242
##    55   0.8619565  0.6270583
##   108   0.8606866  0.6263647
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 55.
Adult_TDA_KDE_5.50.5_n1_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8622003 0.6266351    Fold1
## 2 0.8559265 0.6143710    Fold3
## 3 0.8677427 0.6401687    Fold2
ad_tda_kde_5.50.5_n1_rf_fit0_re<-Adult_TDA_KDE_5.50.5_n1_RfFit0$resample[1]


summary(Adult_TDA_KDE_5.50.5_n1_RfFit0)
##                 Length Class      Mode     
## call                6  -none-     call     
## type                1  -none-     character
## predicted       13387  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           26774  matrix     numeric  
## oob.times       13387  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               13387  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               2  -none-     list
varImp (Adult_TDA_KDE_5.50.5_n1_RfFit0)
## rf variable importance
## 
##   only 20 most important variables shown (out of 108)
## 
##                       Overall
## V5                    100.000
## V11                    82.593
## V3                     81.464
## V6.Married.civ.spouse  81.001
## V1                     69.986
## V13                    45.394
## V12                    19.849
## V8.Husband             16.687
## V7.Exec.managerial     10.834
## V7.Prof.specialty      10.815
## V2.Private              7.020
## V2.Self.emp.not.inc     6.202
## V6.Never.married        5.443
## V7.Craft.repair         5.084
## V7.Sales                5.065
## V2.Self.emp.inc         4.529
## V2.Local.gov            4.141
## V8.Wife                 3.963
## V2.Federal.gov          3.505
## V7.Transport.moving     3.438
# Predict outcome using Adult_TDA_KDE_5.50.5_n1_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n1_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n1_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n1_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7146   580
##      >50K     270  1772
##                                           
##                Accuracy : 0.913           
##                  95% CI : (0.9072, 0.9185)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7508          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9636          
##             Specificity : 0.7534          
##          Pos Pred Value : 0.9249          
##          Neg Pred Value : 0.8678          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7316          
##    Detection Prevalence : 0.7910          
##       Balanced Accuracy : 0.8585          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n1_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7146   580
##      >50K     270  1772
##                                           
##                Accuracy : 0.913           
##                  95% CI : (0.9072, 0.9185)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7508          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9636          
##             Specificity : 0.7534          
##          Pos Pred Value : 0.9249          
##          Neg Pred Value : 0.8678          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7316          
##    Detection Prevalence : 0.7910          
##       Balanced Accuracy : 0.8585          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n1_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   9.129812e-01   7.507791e-01   9.072164e-01   9.184982e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   0.000000e+00   3.024336e-26
ad_tda_kde_5.50.5_n1_rf_cf0_ov_acc<-ad_tda_kde_5.50.5_n1_rf_cf0$overall[1]
ad_tda_kde_5.50.5_n1_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9635922            0.7534014            0.9249288 
##       Neg Pred Value            Precision               Recall 
##            0.8677767            0.9249288            0.9635922 
##                   F1           Prevalence       Detection Rate 
##            0.9438647            0.7592138            0.7315725 
## Detection Prevalence    Balanced Accuracy 
##            0.7909500            0.8584968
ad_tda_kde_5.50.5_n1_rf_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n1_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_rf_n1_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.50.5_n1_rf_fit0_re)
diff_tda_kde_5.50.5_rf_n1_3_fold
##       Accuracy
## 1 -0.004474596
## 2  0.004299869
## 3 -0.009509145
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_rf.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n1_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_rf.n1_3_fold_odds.left<-bst_tda_kde_5.50.5_rf.n1_3_fold$probLeft/bst_tda_kde_5.50.5_rf.n1_3_fold$probRight
bst_tda_kde_5.50.5_rf.n1_3_fold_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_rf.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n1_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_rf.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n1_3_fold
## $left
## [1] 0.1416295
## 
## $rope
## [1] 0.8059252
## 
## $right
## [1] 0.05244523
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_rf_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf_n1_3_fold))
#bf_tda_kde_5.50.5_rf.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_rf_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_rf_n1_3_fold)
## t = -0.80004, df = 2, p-value = 0.5076
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.02058812  0.01413220
## sample estimates:
##    mean of x 
## -0.003227958
### Test set diff
diff_tda_kde_5.50.5_rf.n1_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n1_rf_cf0_ov_acc)
diff_tda_kde_5.50.5_rf.n1_test
##    Accuracy 
## -0.05640868
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_rf.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf.n1_test),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n1_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_rf.n1_test_odds.left<-bst_tda_kde_5.50.5_rf.n1_test$probLeft/bst_tda_kde_5.50.5_rf.n1_test$probRight
bst_tda_kde_5.50.5_rf.n1_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_rf.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf.n1_test),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n1_test
## $winLeft
## [1] 0.8411667
## 
## $winRope
## [1] 0.1588333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_rf.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_rf.n1_test))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf.n1_test)) #bf_tda_kde_5.50.5_rf.n1_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_rf.n1_test))

##Node2

Adult_TDA_KDE_5.50.5_n2_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n2.vec, 
                 Importance=T, n.tree=100,
                method = 'rf', 
                 trControl = fitControl,
                metric='Accuracy')

Adult_TDA_KDE_5.50.5_n2_RfFit0
## Random Forest 
## 
## 12638 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8425, 8425, 8426 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     2   0.7850933  0.3200908
##    55   0.8420635  0.5930775
##   108   0.8396105  0.5886620
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 55.
Adult_TDA_KDE_5.50.5_n2_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8402563 0.5872528    Fold1
## 2 0.8409307 0.5955220    Fold3
## 3 0.8450036 0.5964577    Fold2
ad_tda_KDE_5.50.5_n2_rf_fit0_re<-Adult_TDA_KDE_5.50.5_n2_RfFit0$resample[1]


summary(Adult_TDA_KDE_5.50.5_n2_RfFit0)
##                 Length Class      Mode     
## call                6  -none-     call     
## type                1  -none-     character
## predicted       12638  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           25276  matrix     numeric  
## oob.times       12638  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               12638  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               2  -none-     list
varImp (Adult_TDA_KDE_5.50.5_n2_RfFit0)
## rf variable importance
## 
##   only 20 most important variables shown (out of 108)
## 
##                       Overall
## V6.Married.civ.spouse 100.000
## V3                     94.813
## V1                     78.163
## V11                    76.451
## V5                     54.055
## V13                    44.953
## V8.Husband             37.629
## V12                    21.969
## V7.Exec.managerial     10.964
## V6.Never.married        8.433
## V8.Wife                 8.337
## V2.Private              7.940
## V7.Prof.specialty       7.070
## V2.Self.emp.not.inc     5.941
## V7.Sales                5.904
## V7.Craft.repair         5.350
## V4.HS.grad              4.882
## V2.Local.gov            4.593
## V2.Self.emp.inc         4.587
## V7.Adm.clerical         4.277
# Predict outcome using Adult_TDA_KDE_5.50.5_n2_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n2_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n2_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n2_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7143   566
##      >50K     273  1786
##                                           
##                Accuracy : 0.9141          
##                  95% CI : (0.9084, 0.9196)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7546          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9632          
##             Specificity : 0.7594          
##          Pos Pred Value : 0.9266          
##          Neg Pred Value : 0.8674          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7313          
##    Detection Prevalence : 0.7892          
##       Balanced Accuracy : 0.8613          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n2_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7143   566
##      >50K     273  1786
##                                           
##                Accuracy : 0.9141          
##                  95% CI : (0.9084, 0.9196)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7546          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9632          
##             Specificity : 0.7594          
##          Pos Pred Value : 0.9266          
##          Neg Pred Value : 0.8674          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7313          
##    Detection Prevalence : 0.7892          
##       Balanced Accuracy : 0.8613          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n2_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   9.141073e-01   7.546383e-01   9.083751e-01   9.195912e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   0.000000e+00   6.706794e-24
ad_tda_kde_5.50.5_n2_rf_cf0_ov_acc<-ad_tda_kde_5.50.5_n2_rf_cf0$overall[1]
ad_tda_kde_5.50.5_n2_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9631877            0.7593537            0.9265793 
##       Neg Pred Value            Precision               Recall 
##            0.8674114            0.9265793            0.9631877 
##                   F1           Prevalence       Detection Rate 
##            0.9445289            0.7592138            0.7312654 
## Detection Prevalence    Balanced Accuracy 
##            0.7892097            0.8612707
ad_tda_kde_5.50.5_n2_rf_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n2_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_rf_n2_3_fold<-(ad_rf_fit_re-ad_tda_KDE_5.50.5_n2_rf_fit0_re)
diff_tda_kde_5.50.5_rf_n2_3_fold
##     Accuracy
## 1 0.01746937
## 2 0.01929570
## 3 0.01322995
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_rf.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_rf.n2_3_fold_odds.left<-bst_tda_kde_5.50.5_rf.n2_3_fold$probLeft/bst_tda_kde_5.50.5_rf.n2_3_fold$probRight
bst_tda_kde_5.50.5_rf.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_rf.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1643667
## 
## $winRight
## [1] 0.8356333
# Bayesian Correlated Test

bct_tda_kde_5.50.5_rf.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n2_3_fold
## $left
## [1] 0.002999288
## 
## $rope
## [1] 0.03937538
## 
## $right
## [1] 0.9576253
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_rf_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf_n2_3_fold))
#bf_tda_kde_5.50.5_rf.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_rf_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_rf_n2_3_fold)
## t = 9.2757, df = 2, p-value = 0.01142
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.008934758 0.024395256
## sample estimates:
##  mean of x 
## 0.01666501
### Test set diff
diff_tda_kde_5.50.5_rf.n2_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n2_rf_cf0_ov_acc)
diff_tda_kde_5.50.5_rf.n2_test
##    Accuracy 
## -0.05753481
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_rf.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf.n2_test),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n2_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_rf.n2_test_odds.left<-bst_tda_kde_5.50.5_rf.n2_test$probLeft/bst_tda_kde_5.50.5_rf.n2_test$probRight
bst_tda_kde_5.50.5_rf.n2_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_rf.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf.n2_test),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n2_test
## $winLeft
## [1] 0.8412333
## 
## $winRope
## [1] 0.1587667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_rf.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_rf.n2_test))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf.n2_test)) #bf_tda_kde_5.50.5_rf.n2_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_rf.n2_test))

##Node3

Adult_TDA_KDE_5.50.5_n3_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n3.vec, 
                 Importance=T, n.tree=100,
                method = 'rf', 
                 trControl = fitControl,
                metric='Accuracy')

Adult_TDA_KDE_5.50.5_n3_RfFit0
## Random Forest 
## 
## 11634 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7756, 7756, 7756 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     2   0.7686092  0.2314905
##    55   0.8347946  0.5710963
##   108   0.8309266  0.5625681
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 55.
Adult_TDA_KDE_5.50.5_n3_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8300670 0.5574324    Fold1
## 2 0.8321300 0.5667173    Fold3
## 3 0.8421867 0.5891393    Fold2
ad_tda_kde_5.50.5_n3_rf_fit0_re<-Adult_TDA_KDE_5.50.5_n3_RfFit0$resample[1]


summary(Adult_TDA_KDE_5.50.5_n3_RfFit0)
##                 Length Class      Mode     
## call                6  -none-     call     
## type                1  -none-     character
## predicted       11634  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           23268  matrix     numeric  
## oob.times       11634  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               11634  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               2  -none-     list
varImp (Adult_TDA_KDE_5.50.5_n3_RfFit0)
## rf variable importance
## 
##   only 20 most important variables shown (out of 108)
## 
##                       Overall
## V3                    100.000
## V6.Married.civ.spouse  97.792
## V1                     73.869
## V11                    63.571
## V13                    43.473
## V5                     35.861
## V8.Husband             35.502
## V12                    21.177
## V6.Never.married       10.870
## V7.Exec.managerial      9.354
## V8.Wife                 7.976
## V2.Private              7.408
## V2.Self.emp.not.inc     6.442
## V7.Sales                6.073
## V7.Prof.specialty       6.039
## V7.Craft.repair         5.629
## V4.Bachelors            4.988
## V4.HS.grad              4.750
## V9.White                4.219
## V7.Adm.clerical         4.153
# Predict outcome using Adult_TDA_KDE_5.50.5_n3_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n3_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n3_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n3_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7110   551
##      >50K     306  1801
##                                           
##                Accuracy : 0.9123          
##                  95% CI : (0.9065, 0.9178)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7512          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9587          
##             Specificity : 0.7657          
##          Pos Pred Value : 0.9281          
##          Neg Pred Value : 0.8548          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7279          
##    Detection Prevalence : 0.7843          
##       Balanced Accuracy : 0.8622          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n3_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7110   551
##      >50K     306  1801
##                                           
##                Accuracy : 0.9123          
##                  95% CI : (0.9065, 0.9178)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.7512          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9587          
##             Specificity : 0.7657          
##          Pos Pred Value : 0.9281          
##          Neg Pred Value : 0.8548          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7279          
##    Detection Prevalence : 0.7843          
##       Balanced Accuracy : 0.8622          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n3_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   9.122645e-01   7.511850e-01   9.064793e-01   9.178026e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   0.000000e+00   7.757603e-17
ad_tda_kde_5.50.5_n3_rf_cf0_ov_acc<-ad_tda_kde_5.50.5_n3_rf_cf0$overall[1]
ad_tda_kde_5.50.5_n3_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9587379            0.7657313            0.9280773 
##       Neg Pred Value            Precision               Recall 
##            0.8547698            0.9280773            0.9587379 
##                   F1           Prevalence       Detection Rate 
##            0.9431585            0.7592138            0.7278870 
## Detection Prevalence    Balanced Accuracy 
##            0.7842957            0.8622346
ad_tda_kde_5.50.5_n3_rf_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n3_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_rf_n3_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.50.5_n3_rf_fit0_re)
diff_tda_kde_5.50.5_rf_n3_3_fold
##     Accuracy
## 1 0.02765867
## 2 0.02809641
## 3 0.01604682
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_rf.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_rf.n3_3_fold_odds.left<-bst_tda_kde_5.50.5_rf.n3_3_fold$probLeft/bst_tda_kde_5.50.5_rf.n3_3_fold$probRight
bst_tda_kde_5.50.5_rf.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_rf.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.03763333
## 
## $winRight
## [1] 0.9623667
# Bayesian Correlated Test

bct_tda_kde_5.50.5_rf.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n3_3_fold
## $left
## [1] 0.008776313
## 
## $rope
## [1] 0.03739373
## 
## $right
## [1] 0.95383
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_rf_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf_n3_3_fold))
#bf_tda_kde_5.50.5_rf.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_rf_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_rf_n3_3_fold)
## t = 6.066, df = 2, p-value = 0.02612
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.006957428 0.040910507
## sample estimates:
##  mean of x 
## 0.02393397
### Test set diff
diff_tda_kde_5.50.5_rf.n3_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n3_rf_cf0_ov_acc)
diff_tda_kde_5.50.5_rf.n3_test
##    Accuracy 
## -0.05569206
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_rf.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf.n3_test),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n3_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_rf.n3_test_odds.left<-bst_tda_kde_5.50.5_rf.n3_test$probLeft/bst_tda_kde_5.50.5_rf.n3_test$probRight
bst_tda_kde_5.50.5_rf.n3_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_rf.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf.n3_test),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n3_test
## $winLeft
## [1] 0.8413667
## 
## $winRope
## [1] 0.1586333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_rf.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_rf.n3_test))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf.n3_test)) #bf_tda_kde_5.50.5_rf.n3_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_rf.n3_test))

##Node4

Adult_TDA_KDE_5.50.5_n4_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n4.vec, 
                 Importance=T, n.tree=100,
                method = 'rf', 
                 trControl = fitControl,
                metric='Accuracy')

Adult_TDA_KDE_5.50.5_n4_RfFit0
## Random Forest 
## 
## 10038 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 6692, 6692, 6692 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa     
##     2   0.7965730  0.04215066
##    55   0.8508667  0.51856797
##   108   0.8488743  0.51302439
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 55.
Adult_TDA_KDE_5.50.5_n4_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8493724 0.5058215    Fold1
## 2 0.8478781 0.5128345    Fold3
## 3 0.8553497 0.5370479    Fold2
ad_tda_kde_5.50.5_n4_rf_fit0_re<-Adult_TDA_KDE_5.50.5_n4_RfFit0$resample[1]


summary(Adult_TDA_KDE_5.50.5_n4_RfFit0)
##                 Length Class      Mode     
## call                6  -none-     call     
## type                1  -none-     character
## predicted       10038  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           20076  matrix     numeric  
## oob.times       10038  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y               10038  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               2  -none-     list
varImp (Adult_TDA_KDE_5.50.5_n4_RfFit0)
## rf variable importance
## 
##   only 20 most important variables shown (out of 108)
## 
##                       Overall
## V3                    100.000
## V6.Married.civ.spouse  86.929
## V1                     68.178
## V11                    54.468
## V13                    39.826
## V8.Husband             38.618
## V5                     20.141
## V12                    19.266
## V4.Bachelors           10.960
## V6.Never.married        9.170
## V7.Exec.managerial      7.847
## V2.Private              6.771
## V7.Craft.repair         6.460
## V7.Sales                6.382
## V2.Self.emp.not.inc     6.056
## V8.Wife                 5.972
## V7.Prof.specialty       5.815
## V7.Adm.clerical         4.797
## V7.Transport.moving     4.472
## V7.Tech.support         4.455
# Predict outcome using Adult_TDA_KDE_5.50.5_n4_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n4_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n4_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n4_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6992   686
##      >50K     424  1666
##                                           
##                Accuracy : 0.8864          
##                  95% CI : (0.8799, 0.8926)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.6769          
##                                           
##  Mcnemar's Test P-Value : 4.729e-15       
##                                           
##             Sensitivity : 0.9428          
##             Specificity : 0.7083          
##          Pos Pred Value : 0.9107          
##          Neg Pred Value : 0.7971          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7158          
##    Detection Prevalence : 0.7860          
##       Balanced Accuracy : 0.8256          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n4_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6992   686
##      >50K     424  1666
##                                           
##                Accuracy : 0.8864          
##                  95% CI : (0.8799, 0.8926)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.6769          
##                                           
##  Mcnemar's Test P-Value : 4.729e-15       
##                                           
##             Sensitivity : 0.9428          
##             Specificity : 0.7083          
##          Pos Pred Value : 0.9107          
##          Neg Pred Value : 0.7971          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7158          
##    Detection Prevalence : 0.7860          
##       Balanced Accuracy : 0.8256          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n4_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.863636e-01   6.769042e-01   8.799032e-01   8.925927e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  1.213505e-222   4.728975e-15
ad_tda_kde_5.50.5_n4_rf_cf0_ov_acc<-ad_tda_kde_5.50.5_n4_rf_cf0$overall[1]
ad_tda_kde_5.50.5_n4_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9428263            0.7083333            0.9106538 
##       Neg Pred Value            Precision               Recall 
##            0.7971292            0.9106538            0.9428263 
##                   F1           Prevalence       Detection Rate 
##            0.9264608            0.7592138            0.7158067 
## Detection Prevalence    Balanced Accuracy 
##            0.7860360            0.8255798
ad_tda_kde_5.50.5_n4_rf_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n4_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_rf_n4_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.50.5_n4_rf_fit0_re)
diff_tda_kde_5.50.5_rf_n4_3_fold
##      Accuracy
## 1 0.008353332
## 2 0.012348312
## 3 0.002883842
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_rf.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n4_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.75
## 
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_rf.n4_3_fold_odds.left<-bst_tda_kde_5.50.5_rf.n4_3_fold$probLeft/bst_tda_kde_5.50.5_rf.n4_3_fold$probRight
bst_tda_kde_5.50.5_rf.n4_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_rf.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n4_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.8423
## 
## $winRight
## [1] 0.1577
# Bayesian Correlated Test

bct_tda_kde_5.50.5_rf.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n4_3_fold
## $left
## [1] 0.01501928
## 
## $rope
## [1] 0.7003601
## 
## $right
## [1] 0.2846207
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_rf_n4_3_fold,c(-0.01,0.01)))

### Test set diff
diff_tda_kde_5.50.5_rf.n4_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n4_rf_cf0_ov_acc)
diff_tda_kde_5.50.5_rf.n4_test
##    Accuracy 
## -0.02979115
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_rf.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf.n4_test),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n4_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 
#BayesFactor
#bf_tda_kde_5.50.5_rf.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf_n4_3_fold))
#bf_tda_kde_5.50.5_rf.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_rf_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_rf_n4_3_fold)
## t = 2.8659, df = 2, p-value = 0.1032
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.003941154  0.019664811
## sample estimates:
##   mean of x 
## 0.007861829
bst_tda_kde_5.50.5_rf.n4_test_odds.left<-bst_tda_kde_5.50.5_rf.n4_test$probLeft/bst_tda_kde_5.50.5_rf.n4_test$probRight
bst_tda_kde_5.50.5_rf.n4_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_rf.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf.n4_test),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n4_test
## $winLeft
## [1] 0.841
## 
## $winRope
## [1] 0.159
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_rf.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_rf.n4_test))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf.n4_test)) #bf_tda_kde_5.50.5_rf.n4_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_rf.n4_test))

##Node5

Adult_TDA_KDE_5.50.5_n5_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n5.vec, 
                 Importance=T, n.tree=100,
                method = 'rf', 
                 trControl = fitControl,
                metric='Accuracy')

Adult_TDA_KDE_5.50.5_n5_RfFit0
## Random Forest 
## 
## 7540 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 5028, 5026, 5026 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##     2   0.8461539  0.0000000
##    55   0.8704258  0.4170561
##   108   0.8665802  0.4107397
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 55.
Adult_TDA_KDE_5.50.5_n5_RfFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8757962 0.4321815    Fold1
## 2 0.8687351 0.4186501    Fold3
## 3 0.8667462 0.4003366    Fold2
ad_tda_kde_5.50.5_n5_rf_fit0_re<-Adult_TDA_KDE_5.50.5_n5_RfFit0$resample[1]


summary(Adult_TDA_KDE_5.50.5_n5_RfFit0)
##                 Length Class      Mode     
## call                6  -none-     call     
## type                1  -none-     character
## predicted        7540  factor     numeric  
## err.rate         1500  -none-     numeric  
## confusion           6  -none-     numeric  
## votes           15080  matrix     numeric  
## oob.times        7540  -none-     numeric  
## classes             2  -none-     character
## importance        108  -none-     numeric  
## importanceSD        0  -none-     NULL     
## localImportance     0  -none-     NULL     
## proximity           0  -none-     NULL     
## ntree               1  -none-     numeric  
## mtry                1  -none-     numeric  
## forest             14  -none-     list     
## y                7540  factor     numeric  
## test                0  -none-     NULL     
## inbag               0  -none-     NULL     
## xNames            108  -none-     character
## problemType         1  -none-     character
## tuneValue           1  data.frame list     
## obsLevels           2  -none-     character
## param               2  -none-     list
varImp (Adult_TDA_KDE_5.50.5_n5_RfFit0)
## rf variable importance
## 
##   only 20 most important variables shown (out of 108)
## 
##                       Overall
## V3                    100.000
## V1                     62.634
## V6.Married.civ.spouse  54.512
## V11                    53.340
## V13                    37.313
## V8.Husband             22.589
## V12                    18.432
## V7.Exec.managerial      7.125
## V7.Craft.repair         6.959
## V2.Private              6.085
## V5                      5.980
## V7.Sales                5.951
## V7.Prof.specialty       5.280
## V7.Adm.clerical         5.005
## V7.Machine.op.inspct    4.864
## V6.Never.married        4.688
## V7.Transport.moving     4.612
## V2.Self.emp.not.inc     4.596
## V7.Tech.support         4.516
## V4.HS.grad              4.323
# Predict outcome using Adult_TDA_KDE_5.50.5_n5_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n5_RfFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n5_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n5_rf_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6679   772
##      >50K     737  1580
##                                           
##                Accuracy : 0.8455          
##                  95% CI : (0.8382, 0.8526)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.5753          
##                                           
##  Mcnemar's Test P-Value : 0.3814          
##                                           
##             Sensitivity : 0.9006          
##             Specificity : 0.6718          
##          Pos Pred Value : 0.8964          
##          Neg Pred Value : 0.6819          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6838          
##    Detection Prevalence : 0.7628          
##       Balanced Accuracy : 0.7862          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n5_rf_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6679   772
##      >50K     737  1580
##                                           
##                Accuracy : 0.8455          
##                  95% CI : (0.8382, 0.8526)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.5753          
##                                           
##  Mcnemar's Test P-Value : 0.3814          
##                                           
##             Sensitivity : 0.9006          
##             Specificity : 0.6718          
##          Pos Pred Value : 0.8964          
##          Neg Pred Value : 0.6819          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6838          
##    Detection Prevalence : 0.7628          
##       Balanced Accuracy : 0.7862          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n5_rf_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.455160e-01   5.753120e-01   8.381947e-01   8.526305e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.272474e-97   3.814355e-01
ad_tda_kde_5.50.5_n5_rf_cf0_ov_acc<-ad_tda_kde_5.50.5_n5_rf_cf0$overall[1]
ad_tda_kde_5.50.5_n5_rf_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9006203            0.6717687            0.8963897 
##       Neg Pred Value            Precision               Recall 
##            0.6819163            0.8963897            0.9006203 
##                   F1           Prevalence       Detection Rate 
##            0.8985000            0.7592138            0.6837633 
## Detection Prevalence    Balanced Accuracy 
##            0.7627969            0.7861945
ad_tda_kde_5.50.5_n5_rf_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n5_rf_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_rf_n5_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.50.5_n5_rf_fit0_re)
diff_tda_kde_5.50.5_rf_n5_3_fold
##       Accuracy
## 1 -0.018070461
## 2 -0.008508708
## 3 -0.008512708
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_rf.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n5_3_fold
## $probLeft
## [1] 0.25
## 
## $probRope
## [1] 0.75
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_rf.n5_3_fold_odds.left<-bst_tda_kde_5.50.5_rf.n5_3_fold$probLeft/bst_tda_kde_5.50.5_rf.n5_3_fold$probRight
bst_tda_kde_5.50.5_rf.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_rf.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n5_3_fold
## $winLeft
## [1] 0.3229333
## 
## $winRope
## [1] 0.6770667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_rf.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n5_3_fold
## $left
## [1] 0.6550469
## 
## $rope
## [1] 0.3311655
## 
## $right
## [1] 0.01378764
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_rf_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf_n5_3_fold))
#bf_tda_kde_5.50.5_rf.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_rf_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_rf_n5_3_fold)
## t = -3.6708, df = 2, p-value = 0.06686
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.025408059  0.002013474
## sample estimates:
##   mean of x 
## -0.01169729
### Test set diff
diff_tda_kde_5.50.5_rf.n5_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n5_rf_cf0_ov_acc)
diff_tda_kde_5.50.5_rf.n5_test
##   Accuracy 
## 0.01105651
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_rf.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_rf.n5_test),-0.01,0.01)
bst_tda_kde_5.50.5_rf.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_rf.n5_test_odds.left<-bst_tda_kde_5.50.5_rf.n5_test$probLeft/bst_tda_kde_5.50.5_rf.n5_test$probRight
bst_tda_kde_5.50.5_rf.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_rf.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_rf.n5_test),-0.01,0.01)
bsr_tda_kde_5.50.5_rf.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.4613333
## 
## $winRight
## [1] 0.5386667
# Bayesian Correlated Test

bct_tda_kde_5.50.5_rf.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_rf.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_rf.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_rf.n5_test))

#BayesFactor
#bf_tda_kde_5.50.5_rf.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_rf.n5_test)) #bf_tda_kde_5.50.5_rf.n5_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_rf.n5_test))

##Non-TDA-Assisted
#Support Vector Machine-Radial Basis 
adultSvmFit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train, 
                    method = 'svmRadial', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
adultSvmFit
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 22793 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 15195, 15196, 15195 
## Resampling results across tuning parameters:
## 
##   C     Accuracy   Kappa    
##   0.25  0.8125752  0.3347893
##   0.50  0.8143740  0.3452636
##   1.00  0.8151200  0.3663336
## 
## Tuning parameter 'sigma' was held constant at a value of 0.009491164
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.009491164 and C = 1.
adultSvmFit$resample
##    Accuracy      Kappa Resample
## 1 0.8442806 0.53298607    Fold2
## 2 0.7540142 0.02953679    Fold1
## 3 0.8470650 0.53647790    Fold3
ad_svm_fit_re<-adultSvmFit$resample[1]

summary(adultSvmFit)
## Length  Class   Mode 
##      1   ksvm     S4
#varImp (adultSvmFit)

# Predict outcome using model from training data based on testing data
predictions <- predict(adultSvmFit, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
svm_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
svm_cf
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6997  1036
##      >50K     419  1316
##                                           
##                Accuracy : 0.851           
##                  95% CI : (0.8438, 0.8581)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5525          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9435          
##             Specificity : 0.5595          
##          Pos Pred Value : 0.8710          
##          Neg Pred Value : 0.7585          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7163          
##    Detection Prevalence : 0.8224          
##       Balanced Accuracy : 0.7515          
##                                           
##        'Positive' Class :  <=50K          
## 
svm_cf$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.510442e-01   5.525103e-01   8.438277e-01   8.580507e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  4.493502e-111   1.151688e-58
svm_cf_ov_acc<-svm_cf$overall[1]
svm_cf$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9435005            0.5595238            0.8710320 
##       Neg Pred Value            Precision               Recall 
##            0.7585014            0.8710320            0.9435005 
##                   F1           Prevalence       Detection Rate 
##            0.9058191            0.7592138            0.7163186 
## Detection Prevalence    Balanced Accuracy 
##            0.8223792            0.7515122
svm_cf_pr_rec_f1<-svm_cf$byClass[5:7]

##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node1

Adult_TDA_PC_5.50.5_n1_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec, 
                    method = 'svmRadial', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.50.5_n1_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 12206 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8138, 8137, 8137 
## Resampling results across tuning parameters:
## 
##   C     Accuracy   Kappa     
##   0.25  0.5604626  0.07396875
##   0.50  0.5589061  0.08534571
##   1.00  0.5590698  0.08738347
## 
## Tuning parameter 'sigma' was held constant at a value of 1.958979e-09
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 1.958979e-09 and C = 0.25.
Adult_TDA_PC_5.50.5_n1_SvmFit0$resample
##    Accuracy      Kappa Resample
## 1 0.5583681 0.07058218    Fold3
## 2 0.5561563 0.06156455    Fold2
## 3 0.5668633 0.08975953    Fold1
ad_tda_pc_5.50.5_n1_svm_fit_re<-Adult_TDA_PC_5.50.5_n1_SvmFit0 $resample[1]

summary(Adult_TDA_PC_5.50.5_n1_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
# Predict outcome using Adult_TDA_PC_5.50.5_n1_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n1_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n1_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n1_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   1158   283
##      >50K    6258  2069
##                                          
##                Accuracy : 0.3304         
##                  95% CI : (0.321, 0.3398)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 1              
##                                          
##                   Kappa : 0.0192         
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 0.1561         
##             Specificity : 0.8797         
##          Pos Pred Value : 0.8036         
##          Neg Pred Value : 0.2485         
##              Prevalence : 0.7592         
##          Detection Rate : 0.1186         
##    Detection Prevalence : 0.1475         
##       Balanced Accuracy : 0.5179         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_pc_5.50.5_n1_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   1158   283
##      >50K    6258  2069
##                                          
##                Accuracy : 0.3304         
##                  95% CI : (0.321, 0.3398)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 1              
##                                          
##                   Kappa : 0.0192         
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 0.1561         
##             Specificity : 0.8797         
##          Pos Pred Value : 0.8036         
##          Neg Pred Value : 0.2485         
##              Prevalence : 0.7592         
##          Detection Rate : 0.1186         
##    Detection Prevalence : 0.1475         
##       Balanced Accuracy : 0.5179         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_pc_5.50.5_n1_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##     0.33036446     0.01918533     0.32103757     0.33979259     0.75921376 
## AccuracyPValue  McnemarPValue 
##     1.00000000     0.00000000
ad_tda_pc_5.50.5_n1_svm_cf0_ov_acc<-ad_tda_pc_5.50.5_n1_svm_cf0$overall[1]
ad_tda_pc_5.50.5_n1_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.1561489            0.8796769            0.8036086 
##       Neg Pred Value            Precision               Recall 
##            0.2484688            0.8036086            0.1561489 
##                   F1           Prevalence       Detection Rate 
##            0.2614881            0.7592138            0.1185504 
## Detection Prevalence    Balanced Accuracy 
##            0.1475225            0.5179129
ad_tda_pc_5.50.5_n1_svm_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n1_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_svm_n1_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.50.5_n1_svm_fit_re)
diff_tda_pca_5.50.5_svm_n1_3_fold
##    Accuracy
## 1 0.2859125
## 2 0.1978579
## 3 0.2802017
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_svm.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n1_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_svm.n1_3_fold_odds.left<-bst_tda_pca_5.50.5_svm.n1_3_fold$probLeft/bst_tda_pca_5.50.5_svm.n1_3_fold$probRight
bst_tda_pca_5.50.5_svm.n1_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_svm.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n1_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.0089
## 
## $winRight
## [1] 0.9911
# Bayesian Correlated Test

bct_tda_pca_5.50.5_svm.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n1_3_fold
## $left
## [1] 0.007528936
## 
## $rope
## [1] 0.001247696
## 
## $right
## [1] 0.9912234
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_svm_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm_n1_3_fold))
#bf_tda_pca_5.50.5_rf.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_svm_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_svm_n1_3_fold)
## t = 8.9518, df = 2, p-value = 0.01225
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.1322575 0.3770572
## sample estimates:
## mean of x 
## 0.2546574
### Test set diff
diff_tda_pca_5.50.5_svm.n1_test<-(svm_cf_ov_acc - ad_tda_pc_5.50.5_n1_svm_cf0_ov_acc)
diff_tda_pca_5.50.5_svm.n1_test
##  Accuracy 
## 0.5206798
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_svm.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm.n1_test),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_svm.n1_test_odds.left<-bst_tda_pca_5.50.5_svm.n1_test$probLeft/bst_tda_pca_5.50.5_svm.n1_test$probRight
bst_tda_pca_5.50.5_svm.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_svm.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm.n1_test),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1577333
## 
## $winRight
## [1] 0.8422667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_svm.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_svm.n1_test)))

#BayesFactor
#bf_tda_pca_5.50.5_svm.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm.n1_test)) #bf_tda_pca_5.50.5_svm.n1_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_svm.n1_test))

##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node2

Adult_TDA_PC_5.50.5_n2_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec, 
                    method = 'svmRadial', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.50.5_n2_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 12206 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8136, 8138, 8138 
## Resampling results across tuning parameters:
## 
##   C     Accuracy   Kappa     
##   0.25  0.5641481  0.07100743
##   0.50  0.5642307  0.08915954
##   1.00  0.5620176  0.10430376
## 
## Tuning parameter 'sigma' was held constant at a value of 1.725656e-09
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 1.725656e-09 and C = 0.5.
Adult_TDA_PC_5.50.5_n2_SvmFit0$resample
##    Accuracy      Kappa Resample
## 1 0.5643735 0.07702904    Fold1
## 2 0.5629302 0.08647680    Fold3
## 3 0.5653884 0.10397280    Fold2
ad_tda_pc_5.50.5_n2_svm_fit_re<-Adult_TDA_PC_5.50.5_n2_SvmFit0 $resample[1]

summary(Adult_TDA_PC_5.50.5_n2_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
# Predict outcome using Adult_TDA_PC_5.50.5_n2_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n2_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n2_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n2_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   2074   516
##      >50K    5342  1836
##                                           
##                Accuracy : 0.4003          
##                  95% CI : (0.3906, 0.4101)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.0354          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.2797          
##             Specificity : 0.7806          
##          Pos Pred Value : 0.8008          
##          Neg Pred Value : 0.2558          
##              Prevalence : 0.7592          
##          Detection Rate : 0.2123          
##    Detection Prevalence : 0.2652          
##       Balanced Accuracy : 0.5301          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n2_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   2074   516
##      >50K    5342  1836
##                                           
##                Accuracy : 0.4003          
##                  95% CI : (0.3906, 0.4101)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.0354          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.2797          
##             Specificity : 0.7806          
##          Pos Pred Value : 0.8008          
##          Neg Pred Value : 0.2558          
##              Prevalence : 0.7592          
##          Detection Rate : 0.2123          
##    Detection Prevalence : 0.2652          
##       Balanced Accuracy : 0.5301          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n2_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##     0.40028665     0.03544592     0.39055006     0.41008273     0.75921376 
## AccuracyPValue  McnemarPValue 
##     1.00000000     0.00000000
ad_tda_pc_5.50.5_n2_svm_cf0_ov_acc<-ad_tda_pc_5.50.5_n2_svm_cf0$overall[1]
ad_tda_pc_5.50.5_n2_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.2796656            0.7806122            0.8007722 
##       Neg Pred Value            Precision               Recall 
##            0.2557816            0.8007722            0.2796656 
##                   F1           Prevalence       Detection Rate 
##            0.4145513            0.7592138            0.2123260 
## Detection Prevalence    Balanced Accuracy 
##            0.2651515            0.5301389
ad_tda_pc_5.50.5_n2_svm_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n2_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_svm_n2_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.50.5_n2_svm_fit_re)
diff_tda_pca_5.50.5_svm_n2_3_fold
##    Accuracy
## 1 0.2799072
## 2 0.1910840
## 3 0.2816766
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_svm.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_svm.n2_3_fold_odds.left<-bst_tda_pca_5.50.5_svm.n2_3_fold$probLeft/bst_tda_pca_5.50.5_svm.n2_3_fold$probRight
bst_tda_pca_5.50.5_svm.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_svm.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.01056667
## 
## $winRight
## [1] 0.9894333
# Bayesian Correlated Test

bct_tda_pca_5.50.5_svm.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n2_3_fold
## $left
## [1] 0.008537027
## 
## $rope
## [1] 0.001432547
## 
## $right
## [1] 0.9900304
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_svm_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm_n2_3_fold))
#bf_tda_pca_5.50.5_rf.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_svm_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_svm_n2_3_fold)
## t = 8.389, df = 2, p-value = 0.01391
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.1222099 0.3795686
## sample estimates:
## mean of x 
## 0.2508893
### Test set diff
diff_tda_pca_5.50.5_svm.n2_test<-(svm_cf_ov_acc - ad_tda_pc_5.50.5_n2_svm_cf0_ov_acc)
diff_tda_pca_5.50.5_svm.n2_test
##  Accuracy 
## 0.4507576
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_svm.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm.n2_test),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_svm.n2_test_odds.left<-bst_tda_pca_5.50.5_svm.n2_test$probLeft/bst_tda_pca_5.50.5_svm.n2_test$probRight
bst_tda_pca_5.50.5_svm.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_svm.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm.n2_test),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1614667
## 
## $winRight
## [1] 0.8385333
# Bayesian Correlated Test

bct_tda_pca_5.50.5_svm.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_svm.n2_test)))

#BayesFactor
#bf_tda_pca_5.50.5_svm.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm.n2_test)) #bf_tda_pca_5.50.5_svm.n2_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_svm.n2_test))

##Node3

Adult_TDA_PC_5.50.5_n3_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n3.vec, 
                    method = 'svmRadial', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.50.5_n3_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 13240 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8827, 8827, 8826 
## Resampling results across tuning parameters:
## 
##   C     Accuracy   Kappa    
##   0.25  0.7980364  0.1721521
##   0.50  0.8018128  0.1970827
##   1.00  0.8021150  0.2059550
## 
## Tuning parameter 'sigma' was held constant at a value of 1.794281e-09
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 1.794281e-09 and C = 1.
Adult_TDA_PC_5.50.5_n3_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8012690 0.2004885    Fold2
## 2 0.8051212 0.2199626    Fold1
## 3 0.7999547 0.1974139    Fold3
ad_tda_pc_5.50.5_n3_svm_fit_re<-Adult_TDA_PC_5.50.5_n3_SvmFit0 $resample[1]

summary(Adult_TDA_PC_5.50.5_n3_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
# Predict outcome using Adult_TDA_PC_5.50.5_n3_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n3_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n3_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n3_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7389  1952
##      >50K      27   400
##                                           
##                Accuracy : 0.7974          
##                  95% CI : (0.7893, 0.8053)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.231           
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9964          
##             Specificity : 0.1701          
##          Pos Pred Value : 0.7910          
##          Neg Pred Value : 0.9368          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7564          
##    Detection Prevalence : 0.9563          
##       Balanced Accuracy : 0.5832          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n3_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7389  1952
##      >50K      27   400
##                                           
##                Accuracy : 0.7974          
##                  95% CI : (0.7893, 0.8053)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.231           
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9964          
##             Specificity : 0.1701          
##          Pos Pred Value : 0.7910          
##          Neg Pred Value : 0.9368          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7564          
##    Detection Prevalence : 0.9563          
##       Balanced Accuracy : 0.5832          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n3_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.973997e-01   2.309691e-01   7.892895e-01   8.053321e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.281964e-19   0.000000e+00
ad_tda_pc_5.50.5_n3_svm_cf0_ov_acc<-ad_tda_pc_5.50.5_n3_svm_cf0$overall[1]
ad_tda_pc_5.50.5_n3_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9963592            0.1700680            0.7910288 
##       Neg Pred Value            Precision               Recall 
##            0.9367681            0.7910288            0.9963592 
##                   F1           Prevalence       Detection Rate 
##            0.8819001            0.7592138            0.7564496 
## Detection Prevalence    Balanced Accuracy 
##            0.9562858            0.5832136
ad_tda_pc_5.50.5_n3_svm_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n3_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_svm_n3_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.50.5_n3_svm_fit_re)
diff_tda_pca_5.50.5_svm_n3_3_fold
##      Accuracy
## 1  0.04301166
## 2 -0.05110702
## 3  0.04711033
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_svm.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n3_3_fold
## $probLeft
## [1] 0.25
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_svm.n3_3_fold_odds.left<-bst_tda_pca_5.50.5_svm.n3_3_fold$probLeft/bst_tda_pca_5.50.5_svm.n3_3_fold$probRight
bst_tda_pca_5.50.5_svm.n3_3_fold_odds.left
## [1] 0.5
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_svm.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n3_3_fold
## $winLeft
## [1] 0.1573667
## 
## $winRope
## [1] 0.2193667
## 
## $winRight
## [1] 0.6232667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_svm.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n3_3_fold
## $left
## [1] 0.2989491
## 
## $rope
## [1] 0.1724151
## 
## $right
## [1] 0.5286359
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_svm_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm_n3_3_fold))
#bf_tda_pca_5.50.5_rf.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_svm_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_svm_n3_3_fold)
## t = 0.40542, df = 2, p-value = 0.7244
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1250148  0.1510248
## sample estimates:
##  mean of x 
## 0.01300499
### Test set diff
diff_tda_pca_5.50.5_svm.n3_test<-(svm_cf_ov_acc - ad_tda_pc_5.50.5_n3_svm_cf0_ov_acc)
diff_tda_pca_5.50.5_svm.n3_test
##   Accuracy 
## 0.05364455
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_svm.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm.n3_test),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_svm.n3_test_odds.left<-bst_tda_pca_5.50.5_svm.n3_test$probLeft/bst_tda_pca_5.50.5_svm.n3_test$probRight
bst_tda_pca_5.50.5_svm.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_svm.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm.n3_test),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1564333
## 
## $winRight
## [1] 0.8435667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_svm.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_svm.n3_test)))

#BayesFactor
#bf_tda_pca_5.50.5_svm.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm.n3_test)) #bf_tda_pca_5.50.5_svm.n3_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_svm.n3_test))


##Node4

Adult_TDA_PC_5.50.5_n4_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n4.vec, 
                    method = 'svmRadial', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.50.5_n4_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 16700 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 11133, 11134, 11133 
## Resampling results across tuning parameters:
## 
##   C     Accuracy   Kappa     
##   0.25  0.9459281  0.03567091
##   0.50  0.9475448  0.09220474
##   1.00  0.9486824  0.12855266
## 
## Tuning parameter 'sigma' was held constant at a value of 0.00954656
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.00954656 and C = 1.
Adult_TDA_PC_5.50.5_n4_SvmFit0$resample
##    Accuracy         Kappa Resample
## 1 0.9448437 -0.0003582824    Fold2
## 2 0.9506018  0.1845812798    Fold1
## 3 0.9506018  0.2014349837    Fold3
ad_tda_pc_5.50.5_n4_svm_fit_re<-Adult_TDA_PC_5.50.5_n4_SvmFit0 $resample[1]

summary(Adult_TDA_PC_5.50.5_n4_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
# Predict outcome using Adult_TDA_PC_5.50.5_n4_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n4_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n4_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n4_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7413  2186
##      >50K       3   166
##                                           
##                Accuracy : 0.7759          
##                  95% CI : (0.7675, 0.7841)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 5.336e-05       
##                                           
##                   Kappa : 0.1027          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.99960         
##             Specificity : 0.07058         
##          Pos Pred Value : 0.77227         
##          Neg Pred Value : 0.98225         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75891         
##    Detection Prevalence : 0.98270         
##       Balanced Accuracy : 0.53509         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n4_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7413  2186
##      >50K       3   166
##                                           
##                Accuracy : 0.7759          
##                  95% CI : (0.7675, 0.7841)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 5.336e-05       
##                                           
##                   Kappa : 0.1027          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.99960         
##             Specificity : 0.07058         
##          Pos Pred Value : 0.77227         
##          Neg Pred Value : 0.98225         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75891         
##    Detection Prevalence : 0.98270         
##       Balanced Accuracy : 0.53509         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n4_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.759009e-01   1.027270e-01   7.674988e-01   7.841382e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   5.336459e-05   0.000000e+00
ad_tda_pc_5.50.5_n4_svm_cf0_ov_acc<-ad_tda_pc_5.50.5_n4_svm_cf0$overall[1]
ad_tda_pc_5.50.5_n4_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##           0.99959547           0.07057823           0.77226794 
##       Neg Pred Value            Precision               Recall 
##           0.98224852           0.77226794           0.99959547 
##                   F1           Prevalence       Detection Rate 
##           0.87134881           0.75921376           0.75890663 
## Detection Prevalence    Balanced Accuracy 
##           0.98269861           0.53508685
ad_tda_pc_5.50.5_n4_svm_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n4_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_svm_n4_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.50.5_n4_svm_fit_re)
diff_tda_pca_5.50.5_svm_n4_3_fold
##     Accuracy
## 1 -0.1005631
## 2 -0.1965875
## 3 -0.1035367
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_svm.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_svm.n4_3_fold_odds.left<-bst_tda_pca_5.50.5_svm.n4_3_fold$probLeft/bst_tda_pca_5.50.5_svm.n4_3_fold$probRight
bst_tda_pca_5.50.5_svm.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_svm.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n4_3_fold
## $winLeft
## [1] 0.9919
## 
## $winRope
## [1] 0.0081
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_svm.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n4_3_fold
## $left
## [1] 0.9615457
## 
## $rope
## [1] 0.009110337
## 
## $right
## [1] 0.02934392
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_svm_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm_n4_3_fold))
#bf_tda_pca_5.50.5_rf.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_svm_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_svm_n4_3_fold)
## t = -4.2368, df = 2, p-value = 0.05145
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.269200300  0.002075403
## sample estimates:
##  mean of x 
## -0.1335624
### Test set diff
diff_tda_pca_5.50.5_svm.n4_test<-(svm_cf_ov_acc - ad_tda_pc_5.50.5_n4_svm_cf0_ov_acc)
diff_tda_pca_5.50.5_svm.n4_test
##   Accuracy 
## 0.07514333
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_svm.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm.n4_test),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_svm.n4_test_odds.left<-bst_tda_pca_5.50.5_svm.n4_test$probLeft/bst_tda_pca_5.50.5_svm.n4_test$probRight
bst_tda_pca_5.50.5_svm.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_svm.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm.n4_test),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1563333
## 
## $winRight
## [1] 0.8436667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_svm.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_svm.n4_test)))

#BayesFactor
#bf_tda_pca_5.50.5_svm.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm.n4_test)) #bf_tda_pca_5.50.5_svm.n4_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_svm.n4_test))

##Node5

Adult_TDA_PC_5.50.5_n5_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n5.vec, 
                    method = 'svmRadial', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.50.5_n5_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 14404 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9603, 9603, 9602 
## Resampling results across tuning parameters:
## 
##   C     Accuracy   Kappa
##   0.25  0.9979867  0    
##   0.50  0.9979867  0    
##   1.00  0.9979867  0    
## 
## Tuning parameter 'sigma' was held constant at a value of 1.772804e-09
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 1.772804e-09 and C = 0.25.
Adult_TDA_PC_5.50.5_n5_SvmFit0$resample
##    Accuracy Kappa Resample
## 1 0.9979175     0    Fold3
## 2 0.9981254     0    Fold2
## 3 0.9979171     0    Fold1
ad_tda_pc_5.50.5_n5_svm_fit_re<-Adult_TDA_PC_5.50.5_n5_SvmFit0 $resample[1]

summary(Adult_TDA_PC_5.50.5_n5_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
# Predict outcome using Adult_TDA_PC_5.50.5_n5_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n5_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n5_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n5_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n5_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n5_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_pc_5.50.5_n5_svm_cf0_ov_acc<-ad_tda_pc_5.50.5_n5_svm_cf0$overall[1]
ad_tda_pc_5.50.5_n5_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_pc_5.50.5_n5_svm_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n5_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_svm_n5_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.50.5_n5_svm_fit_re)
diff_tda_pca_5.50.5_svm_n5_3_fold
##     Accuracy
## 1 -0.1536369
## 2 -0.2441112
## 3 -0.1508521
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_svm.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_svm.n5_3_fold_odds.left<-bst_tda_pca_5.50.5_svm.n5_3_fold$probLeft/bst_tda_pca_5.50.5_svm.n5_3_fold$probRight
bst_tda_pca_5.50.5_svm.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_svm.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n5_3_fold
## $winLeft
## [1] 0.9903
## 
## $winRope
## [1] 0.0097
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_svm.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n5_3_fold
## $left
## [1] 0.9802949
## 
## $rope
## [1] 0.003690937
## 
## $right
## [1] 0.01601413
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_svm_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_rf.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm_n5_3_fold))
#bf_tda_pca_5.50.5_rf.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_svm_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_svm_n5_3_fold)
## t = -5.9696, df = 2, p-value = 0.02693
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.31466893 -0.05106451
## sample estimates:
##  mean of x 
## -0.1828667
### Test set diff
diff_tda_pca_5.50.5_svm.n5_test<-(svm_cf_ov_acc - ad_tda_pc_5.50.5_n5_svm_cf0_ov_acc)
diff_tda_pca_5.50.5_svm.n5_test
##   Accuracy 
## 0.09183047
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_svm.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_svm.n5_test),-0.01,0.01)
bst_tda_pca_5.50.5_svm.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_svm.n5_test_odds.left<-bst_tda_pca_5.50.5_svm.n5_test$probLeft/bst_tda_pca_5.50.5_svm.n5_test$probRight
bst_tda_pca_5.50.5_svm.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_svm.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_svm.n5_test),-0.01,0.01)
bsr_tda_pca_5.50.5_svm.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1624667
## 
## $winRight
## [1] 0.8375333
# Bayesian Correlated Test

bct_tda_pca_5.50.5_svm.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_svm.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_svm.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_svm.n5_test)))

#BayesFactor
#bf_tda_pca_5.50.5_svm.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_svm.n5_test)) #bf_tda_pca_5.50.5_svm.n5_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_svm.n5_test))


##With TDA KDE filter 5 intervals, 50% overlap, 5 bins 
##Node1


Adult_TDA_KDE_5.50.5_n1_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n1.vec, 
                    method = 'svmRadial', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.50.5_n1_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 13387 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8925, 8925, 8924 
## Resampling results across tuning parameters:
## 
##   C     Accuracy   Kappa    
##   0.25  0.8061601  0.3585370
##   0.50  0.8086254  0.3697234
##   1.00  0.8111654  0.3890763
## 
## Tuning parameter 'sigma' was held constant at a value of 0.008508323
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.008508323 and C = 1.
Adult_TDA_KDE_5.50.5_n1_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8518602 0.5853196    Fold2
## 2 0.8415509 0.5604742    Fold1
## 3 0.7400851 0.0214351    Fold3
ad_tda_kde_5.50.5_n1_svm_fit_re<-Adult_TDA_KDE_5.50.5_n1_SvmFit0 $resample[1]

summary(Adult_TDA_PC_5.50.5_n1_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
# Predict outcome using Adult_TDA_KDE_5.50.5_n1_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n1_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n1_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n1_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7008  1013
##      >50K     408  1339
##                                           
##                Accuracy : 0.8545          
##                  95% CI : (0.8474, 0.8615)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5638          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9450          
##             Specificity : 0.5693          
##          Pos Pred Value : 0.8737          
##          Neg Pred Value : 0.7665          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7174          
##    Detection Prevalence : 0.8212          
##       Balanced Accuracy : 0.7571          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n1_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7008  1013
##      >50K     408  1339
##                                           
##                Accuracy : 0.8545          
##                  95% CI : (0.8474, 0.8615)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5638          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9450          
##             Specificity : 0.5693          
##          Pos Pred Value : 0.8737          
##          Neg Pred Value : 0.7665          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7174          
##    Detection Prevalence : 0.8212          
##       Balanced Accuracy : 0.7571          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n1_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.545250e-01   5.638017e-01   8.473761e-01   8.614617e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  4.580820e-120   8.849231e-58
ad_tda_kde_5.50.5_n1_svm_cf0_ov_acc<-ad_tda_kde_5.50.5_n1_svm_cf0$overall[1]
ad_tda_kde_5.50.5_n1_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9449838            0.5693027            0.8737065 
##       Neg Pred Value            Precision               Recall 
##            0.7664568            0.8737065            0.9449838 
##                   F1           Prevalence       Detection Rate 
##            0.9079484            0.7592138            0.7174447 
## Detection Prevalence    Balanced Accuracy 
##            0.8211507            0.7571433
ad_tda_kde_5.50.5_n1_svm_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n1_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_svm_n1_3_fold<-(ad_rf_fit_re - ad_tda_kde_5.50.5_n1_svm_fit_re)
diff_tda_kde_5.50.5_svm_n1_3_fold
##      Accuracy
## 1 0.005865565
## 2 0.018675501
## 3 0.118148369
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_svm.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n1_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_svm.n1_3_fold_odds.left<-bst_tda_kde_5.50.5_svm.n1_3_fold$probLeft/bst_tda_kde_5.50.5_svm.n1_3_fold$probRight
bst_tda_kde_5.50.5_svm.n1_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_svm.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n1_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.2156
## 
## $winRight
## [1] 0.7844
# Bayesian Correlated Test

bct_tda_kde_5.50.5_svm.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n1_3_fold
## $left
## [1] 0.1476268
## 
## $rope
## [1] 0.08040408
## 
## $right
## [1] 0.7719691
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_svm_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm_n1_3_fold))
#bf_tda_kde_5.50.5_svm.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_svm_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_svm_n1_3_fold)
## t = 1.3403, df = 2, p-value = 0.3121
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1051200  0.2002463
## sample estimates:
##  mean of x 
## 0.04756314
### Test set diff
diff_tda_kde_5.50.5_svm.n1_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n1_svm_cf0_ov_acc)
diff_tda_kde_5.50.5_svm.n1_test
##    Accuracy 
## 0.002047502
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_svm.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm.n1_test),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_svm.n1_test_odds.left<-bst_tda_kde_5.50.5_svm.n1_test$probLeft/bst_tda_kde_5.50.5_svm.n1_test$probRight
bst_tda_kde_5.50.5_svm.n1_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_svm.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm.n1_test),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_svm.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_svm.n1_test))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm.n1_test)) #bf_tda_kde_5.50.5_svm.n1_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_svm.n1_test))


##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node2

Adult_TDA_KDE_5.50.5_n2_SvmFit0 <- train(as.factor(adult_df1) ~ ., data =  tda.m_kde_adult_5.50.5.n2.vec, 
                    method = 'svmRadial', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.50.5_n2_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 12638 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8425, 8425, 8426 
## Resampling results across tuning parameters:
## 
##   C     Accuracy   Kappa    
##   0.25  0.7672885  0.2266295
##   0.50  0.7695040  0.2462460
##   1.00  0.7684756  0.2480235
## 
## Tuning parameter 'sigma' was held constant at a value of 1.621074e-09
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 1.621074e-09 and C = 0.5.
Adult_TDA_KDE_5.50.5_n2_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.7728460 0.2619111    Fold1
## 2 0.7616334 0.2103098    Fold3
## 3 0.7740328 0.2665170    Fold2
ad_tda_kde_5.50.5_n2_svm_fit_re<-Adult_TDA_KDE_5.50.5_n2_SvmFit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n2_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
# Predict outcome using Adult_TDA_KDE_5.50.5_n2_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n2_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n2_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n2_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7186  1875
##      >50K     230   477
##                                           
##                Accuracy : 0.7845          
##                  95% CI : (0.7762, 0.7926)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1.766e-09       
##                                           
##                   Kappa : 0.2257          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9690          
##             Specificity : 0.2028          
##          Pos Pred Value : 0.7931          
##          Neg Pred Value : 0.6747          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7357          
##    Detection Prevalence : 0.9276          
##       Balanced Accuracy : 0.5859          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n2_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7186  1875
##      >50K     230   477
##                                           
##                Accuracy : 0.7845          
##                  95% CI : (0.7762, 0.7926)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1.766e-09       
##                                           
##                   Kappa : 0.2257          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9690          
##             Specificity : 0.2028          
##          Pos Pred Value : 0.7931          
##          Neg Pred Value : 0.6747          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7357          
##    Detection Prevalence : 0.9276          
##       Balanced Accuracy : 0.5859          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n2_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.845004e-01   2.256840e-01   7.762111e-01   7.926197e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.765672e-09  3.458873e-281
ad_tda_kde_5.50.5_n2_svm_cf0_ov_acc<-ad_tda_kde_5.50.5_n2_svm_cf0$overall[1]
ad_tda_kde_5.50.5_n2_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9689860            0.2028061            0.7930692 
##       Neg Pred Value            Precision               Recall 
##            0.6746818            0.7930692            0.9689860 
##                   F1           Prevalence       Detection Rate 
##            0.8722462            0.7592138            0.7356675 
## Detection Prevalence    Balanced Accuracy 
##            0.9276208            0.5858960
ad_tda_kde_5.50.5_n2_svm_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n2_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_svm_n2_3_fold<-(ad_rf_fit_re - ad_tda_kde_5.50.5_n2_svm_fit_re)
diff_tda_kde_5.50.5_svm_n2_3_fold
##     Accuracy
## 1 0.08487976
## 2 0.09859295
## 3 0.08420076
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_svm.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_svm.n2_3_fold_odds.left<-bst_tda_kde_5.50.5_svm.n2_3_fold$probLeft/bst_tda_kde_5.50.5_svm.n2_3_fold$probRight
bst_tda_kde_5.50.5_svm.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_svm.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.007866667
## 
## $winRight
## [1] 0.9921333
# Bayesian Correlated Test

bct_tda_kde_5.50.5_svm.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n2_3_fold
## $left
## [1] 0.001481744
## 
## $rope
## [1] 0.0008367022
## 
## $right
## [1] 0.9976816
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_svm_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm_n2_3_fold))
#bf_tda_kde_5.50.5_svm.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_svm_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_svm_n2_3_fold)
## t = 19.031, df = 2, p-value = 0.00275
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.06905224 0.10939674
## sample estimates:
##  mean of x 
## 0.08922449
### Test set diff
diff_tda_kde_5.50.5_svm.n2_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n2_svm_cf0_ov_acc)
diff_tda_kde_5.50.5_svm.n2_test
##   Accuracy 
## 0.07207207
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_svm.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm.n2_test),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_svm.n2_test_odds.left<-bst_tda_kde_5.50.5_svm.n2_test$probLeft/bst_tda_kde_5.50.5_svm.n2_test$probRight
bst_tda_kde_5.50.5_svm.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_svm.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm.n2_test),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1592
## 
## $winRight
## [1] 0.8408
# Bayesian Correlated Test

bct_tda_kde_5.50.5_svm.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_svm.n2_test))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm.n2_test)) #bf_tda_kde_5.50.5_svm.n2_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_svm.n2_test))

##Node3

Adult_TDA_KDE_5.50.5_n3_SvmFit0 <- train(as.factor(adult_df1) ~ ., data =  tda.m_kde_adult_5.50.5.n3.vec, 
                    method = 'svmRadial', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.50.5_n3_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 11634 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7757, 7756, 7755 
## Resampling results across tuning parameters:
## 
##   C     Accuracy   Kappa    
##   0.25  0.7616464  0.1975413
##   0.50  0.7636233  0.2132360
##   1.00  0.7640530  0.2171511
## 
## Tuning parameter 'sigma' was held constant at a value of 2.398218e-09
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 2.398218e-09 and C = 1.
Adult_TDA_KDE_5.50.5_n3_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.7596699 0.2012373    Fold2
## 2 0.7627031 0.2102880    Fold1
## 3 0.7697860 0.2399280    Fold3
ad_tda_kde_5.50.5_n3_svm_fit_re<-Adult_TDA_KDE_5.50.5_n3_SvmFit0 $resample[1]

summary(Adult_TDA_KDE_5.50.5_n3_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
# Predict outcome using Adult_TDA_KDE_5.50.5_n3_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n3_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n3_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n3_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6906  1842
##      >50K     510   510
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0.1836          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.9312          
##             Specificity : 0.2168          
##          Pos Pred Value : 0.7894          
##          Neg Pred Value : 0.5000          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7070          
##    Detection Prevalence : 0.8956          
##       Balanced Accuracy : 0.5740          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n3_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6906  1842
##      >50K     510   510
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0.1836          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.9312          
##             Specificity : 0.2168          
##          Pos Pred Value : 0.7894          
##          Neg Pred Value : 0.5000          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7070          
##    Detection Prevalence : 0.8956          
##       Balanced Accuracy : 0.5740          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n3_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.592138e-01   1.835590e-01   7.506071e-01   7.676657e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   5.055358e-01  8.024346e-166
ad_tda_kde_5.50.5_n3_svm_cf0_ov_acc<-ad_tda_kde_5.50.5_n3_svm_cf0$overall[1]
ad_tda_kde_5.50.5_n3_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9312298            0.2168367            0.7894376 
##       Neg Pred Value            Precision               Recall 
##            0.5000000            0.7894376            0.9312298 
##                   F1           Prevalence       Detection Rate 
##            0.8544915            0.7592138            0.7070025 
## Detection Prevalence    Balanced Accuracy 
##            0.8955774            0.5740333
ad_tda_kde_5.50.5_n3_svm_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n3_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_svm_n3_3_fold<-(ad_rf_fit_re - ad_tda_kde_5.50.5_n3_svm_fit_re)
diff_tda_kde_5.50.5_svm_n3_3_fold
##     Accuracy
## 1 0.09805578
## 2 0.09752325
## 3 0.08844749
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_svm.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_svm.n3_3_fold_odds.left<-bst_tda_kde_5.50.5_svm.n3_3_fold$probLeft/bst_tda_kde_5.50.5_svm.n3_3_fold$probRight
bst_tda_kde_5.50.5_svm.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_svm.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.009266667
## 
## $winRight
## [1] 0.9907333
# Bayesian Correlated Test

bct_tda_kde_5.50.5_svm.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n3_3_fold
## $left
## [1] 0.0005903999
## 
## $rope
## [1] 0.0003109946
## 
## $right
## [1] 0.9990986
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_svm_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm_n3_3_fold))
#bf_tda_kde_5.50.5_svm.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_svm_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_svm_n3_3_fold)
## t = 30.366, df = 2, p-value = 0.001083
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.08126068 0.10809033
## sample estimates:
##  mean of x 
## 0.09467551
### Test set diff
diff_tda_kde_5.50.5_svm.n3_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n3_svm_cf0_ov_acc)
diff_tda_kde_5.50.5_svm.n3_test
##   Accuracy 
## 0.09735872
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_svm.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm.n3_test),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_svm.n3_test_odds.left<-bst_tda_kde_5.50.5_svm.n3_test$probLeft/bst_tda_kde_5.50.5_svm.n3_test$probRight
bst_tda_kde_5.50.5_svm.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_svm.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm.n3_test),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1556333
## 
## $winRight
## [1] 0.8443667
# Bayesian Correlated Test

bct_tda_kde_5.50.5_svm.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_svm.n3_test))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm.n3_test)) #bf_tda_kde_5.50.5_svm.n3_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_svm.n3_test))

##Node4

Adult_TDA_KDE_5.50.5_n4_SvmFit0 <- train(as.factor(adult_df1) ~ ., data =  tda.m_kde_adult_5.50.5.n4.vec, 
                    method = 'svmRadial', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.50.5_n4_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 10038 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 6692, 6692, 6692 
## Resampling results across tuning parameters:
## 
##   C     Accuracy   Kappa    
##   0.25  0.8144053  0.1830632
##   0.50  0.8140068  0.1875297
##   1.00  0.8154015  0.1964493
## 
## Tuning parameter 'sigma' was held constant at a value of 5.623657e-09
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 5.623657e-09 and C = 1.
Adult_TDA_KDE_5.50.5_n4_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8147041 0.1872175    Fold2
## 2 0.8161984 0.2037347    Fold1
## 3 0.8153019 0.1983957    Fold3
ad_tda_kde_5.50.5_n4_svm_fit_re<-Adult_TDA_KDE_5.50.5_n4_SvmFit0 $resample[1]

summary(Adult_TDA_KDE_5.50.5_n4_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
# Predict outcome using Adult_TDA_KDE_5.50.5_n4_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n4_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n4_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n4_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6391  1667
##      >50K    1025   685
##                                           
##                Accuracy : 0.7244          
##                  95% CI : (0.7154, 0.7333)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.1688          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.8618          
##             Specificity : 0.2912          
##          Pos Pred Value : 0.7931          
##          Neg Pred Value : 0.4006          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6543          
##    Detection Prevalence : 0.8249          
##       Balanced Accuracy : 0.5765          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n4_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6391  1667
##      >50K    1025   685
##                                           
##                Accuracy : 0.7244          
##                  95% CI : (0.7154, 0.7333)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.1688          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.8618          
##             Specificity : 0.2912          
##          Pos Pred Value : 0.7931          
##          Neg Pred Value : 0.4006          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6543          
##    Detection Prevalence : 0.8249          
##       Balanced Accuracy : 0.5765          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n4_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.244062e-01   1.687538e-01   7.154282e-01   7.332502e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.000000e+00   4.613729e-35
ad_tda_kde_5.50.5_n4_svm_cf0_ov_acc<-ad_tda_kde_5.50.5_n4_svm_cf0$overall[1]
ad_tda_kde_5.50.5_n4_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.8617853            0.2912415            0.7931248 
##       Neg Pred Value            Precision               Recall 
##            0.4005848            0.7931248            0.8617853 
##                   F1           Prevalence       Detection Rate 
##            0.8260308            0.7592138            0.6542793 
## Detection Prevalence    Balanced Accuracy 
##            0.8249386            0.5765134
ad_tda_kde_5.50.5_n4_svm_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n4_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_svm_n4_3_fold<-(ad_rf_fit_re - ad_tda_kde_5.50.5_n4_svm_fit_re)
diff_tda_kde_5.50.5_svm_n4_3_fold
##     Accuracy
## 1 0.04302159
## 2 0.04402793
## 3 0.04293166
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_svm.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n4_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_svm.n4_3_fold_odds.left<-bst_tda_kde_5.50.5_svm.n4_3_fold$probLeft/bst_tda_kde_5.50.5_svm.n4_3_fold$probRight
bst_tda_kde_5.50.5_svm.n4_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_svm.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n4_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.008933333
## 
## $winRight
## [1] 0.9910667
# Bayesian Correlated Test

bct_tda_kde_5.50.5_svm.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n4_3_fold
## $left
## [1] 2.894449e-05
## 
## $rope
## [1] 4.515383e-05
## 
## $right
## [1] 0.9999259
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_svm_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm_n4_3_fold))
#bf_tda_kde_5.50.5_svm.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_svm_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_svm_n4_3_fold)
## t = 123.3, df = 2, p-value = 6.577e-05
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.04181513 0.04483899
## sample estimates:
##  mean of x 
## 0.04332706
### Test set diff
diff_tda_kde_5.50.5_svm.n4_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n4_svm_cf0_ov_acc)
diff_tda_kde_5.50.5_svm.n4_test
##  Accuracy 
## 0.1321663
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_svm.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm.n4_test),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_svm.n4_test_odds.left<-bst_tda_kde_5.50.5_svm.n4_test$probLeft/bst_tda_kde_5.50.5_svm.n4_test$probRight
bst_tda_kde_5.50.5_svm.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_svm.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm.n4_test),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1582667
## 
## $winRight
## [1] 0.8417333
# Bayesian Correlated Test

bct_tda_kde_5.50.5_svm.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_svm.n4_test))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm.n4_test)) #bf_tda_kde_5.50.5_svm.n4_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_svm.n4_test))

##Node5

Adult_TDA_KDE_5.50.5_n5_SvmFit0 <- train(as.factor(adult_df1) ~ ., data =  tda.m_kde_adult_5.50.5.n3.vec, 
                    method = 'svmRadial', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.

## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.50.5_n5_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel 
## 
## 11634 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7757, 7755, 7756 
## Resampling results across tuning parameters:
## 
##   C     Accuracy   Kappa    
##   0.25  0.7616475  0.1966785
##   0.50  0.7635383  0.2124213
##   1.00  0.7641398  0.2169347
## 
## Tuning parameter 'sigma' was held constant at a value of 2.217989e-09
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 2.217989e-09 and C = 1.
Adult_TDA_KDE_5.50.5_n5_SvmFit0$resample
##    Accuracy     Kappa Resample
## 1 0.7656613 0.2203667    Fold2
## 2 0.7686355 0.2361994    Fold1
## 3 0.7581227 0.1942380    Fold3
ad_tda_kde_5.50.5_n5_svm_fit_re<-Adult_TDA_KDE_5.50.5_n5_SvmFit0 $resample[1]

summary(Adult_TDA_KDE_5.50.5_n5_SvmFit0)
## Length  Class   Mode 
##      1   ksvm     S4
# Predict outcome using Adult_TDA_KDE_5.50.5_n5_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n5_SvmFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n5_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n5_svm_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6913  1845
##      >50K     503   507
##                                          
##                Accuracy : 0.7596         
##                  95% CI : (0.751, 0.7681)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 0.4678         
##                                          
##                   Kappa : 0.1835         
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 0.9322         
##             Specificity : 0.2156         
##          Pos Pred Value : 0.7893         
##          Neg Pred Value : 0.5020         
##              Prevalence : 0.7592         
##          Detection Rate : 0.7077         
##    Detection Prevalence : 0.8966         
##       Balanced Accuracy : 0.5739         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.50.5_n5_svm_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6913  1845
##      >50K     503   507
##                                          
##                Accuracy : 0.7596         
##                  95% CI : (0.751, 0.7681)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 0.4678         
##                                          
##                   Kappa : 0.1835         
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 0.9322         
##             Specificity : 0.2156         
##          Pos Pred Value : 0.7893         
##          Neg Pred Value : 0.5020         
##              Prevalence : 0.7592         
##          Detection Rate : 0.7077         
##    Detection Prevalence : 0.8966         
##       Balanced Accuracy : 0.5739         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.50.5_n5_svm_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.596233e-01   1.834781e-01   7.510214e-01   7.680701e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   4.678028e-01  1.415959e-168
ad_tda_kde_5.50.5_n5_svm_cf0_ov_acc<-ad_tda_kde_5.50.5_n5_svm_cf0$overall[1]
ad_tda_kde_5.50.5_n5_svm_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9321737            0.2155612            0.7893355 
##       Neg Pred Value            Precision               Recall 
##            0.5019802            0.7893355            0.9321737 
##                   F1           Prevalence       Detection Rate 
##            0.8548287            0.7592138            0.7077191 
## Detection Prevalence    Balanced Accuracy 
##            0.8966011            0.5738675
ad_tda_kde_5.50.5_n5_svm_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n5_svm_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_svm_n5_3_fold<-(ad_rf_fit_re - ad_tda_kde_5.50.5_n5_svm_fit_re)
diff_tda_kde_5.50.5_svm_n5_3_fold
##     Accuracy
## 1 0.09206446
## 2 0.09159083
## 3 0.10011077
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_svm.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n5_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_svm.n5_3_fold_odds.left<-bst_tda_kde_5.50.5_svm.n5_3_fold$probLeft/bst_tda_kde_5.50.5_svm.n5_3_fold$probRight
bst_tda_kde_5.50.5_svm.n5_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_svm.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n5_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.009333333
## 
## $winRight
## [1] 0.9906667
# Bayesian Correlated Test

bct_tda_kde_5.50.5_svm.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n5_3_fold
## $left
## [1] 0.0004650956
## 
## $rope
## [1] 0.000245409
## 
## $right
## [1] 0.9992895
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_svm_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm_n5_3_fold))
#bf_tda_kde_5.50.5_svm.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_svm_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_svm_n5_3_fold)
## t = 34.216, df = 2, p-value = 0.0008531
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.08269433 0.10648304
## sample estimates:
##  mean of x 
## 0.09458869
### Test set diff
diff_tda_kde_5.50.5_svm.n5_test<-(rf_cf_ov_acc-ad_tda_kde_5.50.5_n5_svm_cf0_ov_acc)
diff_tda_kde_5.50.5_svm.n5_test
##   Accuracy 
## 0.09694922
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_svm.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_svm.n5_test),-0.01,0.01)
bst_tda_kde_5.50.5_svm.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_svm.n5_test_odds.left<-bst_tda_kde_5.50.5_svm.n5_test$probLeft/bst_tda_kde_5.50.5_svm.n5_test$probRight
bst_tda_kde_5.50.5_svm.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_svm.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_svm.n5_test),-0.01,0.01)
bsr_tda_kde_5.50.5_svm.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1608667
## 
## $winRight
## [1] 0.8391333
# Bayesian Correlated Test

bct_tda_kde_5.50.5_svm.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_svm.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_svm.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_svm.n5_test))

#BayesFactor
#bf_tda_kde_5.50.5_svm.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_svm.n5_test)) #bf_tda_kde_5.50.5_svm.n5_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_svm.n4_test))


#Non-TDA-Assisted
#Neural Network 
adultNn1Fit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train, 
                            method = 'nnet', 
                          trControl = fitControl,
                         metric='Accuracy')
## # weights:  111
## initial  value 12788.553360 
## final  value 8389.119039 
## converged
## # weights:  331
## initial  value 14325.472916 
## iter  10 value 8359.185647
## final  value 8359.178310 
## converged
## # weights:  551
## initial  value 9489.912405 
## iter  10 value 8386.281000
## final  value 8386.271561 
## converged
## # weights:  111
## initial  value 10474.538577 
## iter  10 value 8389.250844
## iter  20 value 7995.059789
## iter  30 value 7994.945135
## iter  40 value 7833.563480
## iter  50 value 7636.693393
## iter  60 value 7608.364754
## iter  70 value 7594.239897
## iter  80 value 7586.275133
## iter  90 value 7531.262699
## iter 100 value 7400.034087
## final  value 7400.034087 
## stopped after 100 iterations
## # weights:  331
## initial  value 8930.514994 
## iter  10 value 7873.640447
## iter  20 value 7727.766957
## iter  30 value 7723.417418
## iter  40 value 7717.619770
## iter  50 value 7709.699631
## iter  60 value 7708.875294
## final  value 7708.650677 
## converged
## # weights:  551
## initial  value 12050.258993 
## iter  10 value 8345.305524
## iter  20 value 7736.724655
## iter  30 value 7710.340866
## iter  40 value 7540.374282
## iter  50 value 7537.557728
## iter  60 value 7520.537764
## iter  70 value 7473.790449
## iter  80 value 7430.396895
## iter  90 value 6924.530326
## iter 100 value 6890.497372
## final  value 6890.497372 
## stopped after 100 iterations
## # weights:  111
## initial  value 11724.156299 
## final  value 8389.120885 
## converged
## # weights:  331
## initial  value 9039.445732 
## iter  10 value 8373.470366
## final  value 8373.467130 
## converged
## # weights:  551
## initial  value 9367.713136 
## iter  10 value 8290.800918
## iter  20 value 8290.365797
## final  value 8290.289906 
## converged
## # weights:  111
## initial  value 9529.357058 
## iter  10 value 8377.748390
## final  value 8377.723891 
## converged
## # weights:  331
## initial  value 8477.836939 
## final  value 8290.395127 
## converged
## # weights:  551
## initial  value 8408.607042 
## iter  10 value 8370.600251
## final  value 8370.595169 
## converged
## # weights:  111
## initial  value 8736.706853 
## iter  10 value 8387.763665
## final  value 8387.761292 
## converged
## # weights:  331
## initial  value 12334.349722 
## iter  10 value 8257.675924
## iter  20 value 8164.908082
## iter  30 value 7743.399507
## iter  40 value 7710.637003
## iter  50 value 7650.701357
## iter  60 value 7595.163592
## iter  70 value 7479.587214
## iter  80 value 7444.988473
## iter  90 value 7413.405359
## iter 100 value 6951.924548
## final  value 6951.924548 
## stopped after 100 iterations
## # weights:  551
## initial  value 11467.833931 
## iter  10 value 7912.798089
## iter  20 value 7789.753807
## iter  30 value 7737.632808
## iter  40 value 7727.114189
## iter  50 value 7679.242047
## iter  60 value 7596.520117
## iter  70 value 7574.744142
## iter  80 value 7540.803785
## iter  90 value 7525.448257
## iter 100 value 7509.157706
## final  value 7509.157706 
## stopped after 100 iterations
## # weights:  111
## initial  value 8378.256321 
## final  value 8346.342190 
## converged
## # weights:  331
## initial  value 8623.254579 
## final  value 8350.632826 
## converged
## # weights:  551
## initial  value 12458.141905 
## final  value 8291.988281 
## converged
## # weights:  111
## initial  value 11497.658504 
## iter  10 value 8321.386017
## final  value 8318.065524 
## converged
## # weights:  331
## initial  value 11183.859705 
## final  value 8387.695367 
## converged
## # weights:  551
## initial  value 10392.733472 
## iter  10 value 8315.720821
## final  value 8302.107472 
## converged
## # weights:  111
## initial  value 10584.054466 
## iter  10 value 8350.473900
## iter  20 value 7765.985722
## iter  30 value 7684.953467
## iter  40 value 7219.247332
## iter  50 value 6585.218147
## iter  60 value 6354.692475
## iter  70 value 6197.477102
## iter  80 value 6048.788172
## iter  90 value 6020.668021
## iter 100 value 5987.635176
## final  value 5987.635176 
## stopped after 100 iterations
## # weights:  331
## initial  value 8479.379275 
## iter  10 value 8297.412402
## iter  20 value 7750.796340
## iter  30 value 7644.881033
## iter  40 value 7634.993192
## iter  50 value 7422.145607
## iter  60 value 7016.882324
## iter  70 value 6733.796618
## iter  80 value 6637.865547
## iter  90 value 6619.706538
## iter 100 value 6491.184702
## final  value 6491.184702 
## stopped after 100 iterations
## # weights:  551
## initial  value 13094.972997 
## iter  10 value 8387.805508
## final  value 8387.789581 
## converged
## # weights:  111
## initial  value 12169.874050 
## final  value 8387.697303 
## converged
## # weights:  331
## initial  value 9547.772622 
## iter  10 value 8382.210042
## iter  20 value 8380.590981
## iter  20 value 8380.590940
## final  value 8380.590386 
## converged
## # weights:  551
## initial  value 9702.014324 
## iter  10 value 8374.895408
## final  value 8374.892144 
## converged
## # weights:  331
## initial  value 16171.839327 
## iter  10 value 12727.268744
## iter  20 value 12579.545478
## iter  30 value 12360.091427
## iter  30 value 12360.091355
## final  value 12360.091355 
## converged
adultNn1Fit
## Neural Network 
## 
## 22793 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 15196, 15195, 15195 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa      
##   1     0e+00  0.7604966  0.008226790
##   1     1e-04  0.7598824  0.004590314
##   1     1e-01  0.7780914  0.117934510
##   3     0e+00  0.7608476  0.010621859
##   3     1e-04  0.7601457  0.006068214
##   3     1e-01  0.7995434  0.293424323
##   5     0e+00  0.7611985  0.012612517
##   5     1e-04  0.7622078  0.019094493
##   5     1e-01  0.7934467  0.243498889
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.1.
adultNn1Fit$resample
##    Accuracy     Kappa Resample
## 1 0.7925497 0.2258873    Fold1
## 2 0.8098184 0.3938902    Fold2
## 3 0.7962622 0.2604955    Fold3
ad_nn1_fit_re<-adultNn1Fit$resample[1]

summary(adultNn1Fit)
## a 108-3-1 network with 331 weights
## options were - entropy fitting  decay=0.1
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00     1.10     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##    -4.78    -0.13     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00     0.00     0.00     0.00 
##  b->o h1->o h2->o h3->o 
##  1.73 -4.63  1.73  0.00
#varImp (adultNn1Fit)

# Predict outcome using model from training data based on testing data
predictions <- predict(adultNn1Fit, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
nn1_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
nn1_cf
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7414  2285
##      >50K       2    67
##                                           
##                Accuracy : 0.7659          
##                  95% CI : (0.7573, 0.7742)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.06312         
##                                           
##                   Kappa : 0.0422          
##                                           
##  Mcnemar's Test P-Value : < 2e-16         
##                                           
##             Sensitivity : 0.99973         
##             Specificity : 0.02849         
##          Pos Pred Value : 0.76441         
##          Neg Pred Value : 0.97101         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75901         
##    Detection Prevalence : 0.99294         
##       Balanced Accuracy : 0.51411         
##                                           
##        'Positive' Class :  <=50K          
## 
nn1_cf$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##     0.76586814     0.04220316     0.75734084     0.77423664     0.75921376 
## AccuracyPValue  McnemarPValue 
##     0.06311549     0.00000000
nn1_cf_ov_acc<-nn1_cf$overall[1]
nn1_cf$byClass 
##          Sensitivity          Specificity       Pos Pred Value 
##           0.99973031           0.02848639           0.76440870 
##       Neg Pred Value            Precision               Recall 
##           0.97101449           0.76440870           0.99973031 
##                   F1           Prevalence       Detection Rate 
##           0.86637453           0.75921376           0.75900901 
## Detection Prevalence    Balanced Accuracy 
##           0.99293612           0.51410835
nn1_cf_pre_rec_f1<-nn1_cf$byClass[5:7]

##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node1

#Neural Network 1
Adult_TDA_PC_5.50.5_n1_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n1.vec, 
                       method = 'nnet', 
                          trControl = fitControl,
                       metric='Accuracy')
## # weights:  111
## initial  value 1433.514756 
## final  value 401.565149 
## converged
## # weights:  331
## initial  value 2157.289911 
## final  value 401.565149 
## converged
## # weights:  551
## initial  value 2600.926981 
## final  value 400.182690 
## converged
## # weights:  111
## initial  value 2325.464640 
## iter  10 value 403.144218
## iter  20 value 402.875146
## iter  30 value 400.672774
## iter  40 value 394.237785
## iter  50 value 394.077794
## iter  60 value 392.550957
## final  value 392.550028 
## converged
## # weights:  331
## initial  value 1785.159913 
## iter  10 value 407.626121
## iter  20 value 398.815097
## iter  30 value 394.884057
## iter  40 value 392.496650
## iter  50 value 391.987659
## final  value 391.984313 
## converged
## # weights:  551
## initial  value 2133.572495 
## iter  10 value 627.558520
## iter  20 value 409.101366
## iter  30 value 398.402020
## iter  40 value 398.290292
## iter  50 value 393.621433
## iter  60 value 393.166413
## iter  70 value 391.687475
## iter  80 value 391.677293
## iter  90 value 391.673981
## iter 100 value 391.551269
## final  value 391.551269 
## stopped after 100 iterations
## # weights:  111
## initial  value 2548.121076 
## iter  10 value 401.589723
## final  value 401.495230 
## converged
## # weights:  331
## initial  value 1520.513350 
## iter  10 value 399.379999
## iter  20 value 399.371704
## iter  20 value 399.371703
## iter  20 value 399.371703
## final  value 399.371703 
## converged
## # weights:  551
## initial  value 1404.782274 
## iter  10 value 399.163566
## iter  20 value 399.118644
## iter  20 value 399.118642
## iter  20 value 399.118642
## final  value 399.118642 
## converged
## # weights:  111
## initial  value 1410.811282 
## iter  10 value 404.628528
## iter  20 value 404.615530
## final  value 404.615507 
## converged
## # weights:  331
## initial  value 1424.368635 
## final  value 405.188657 
## converged
## # weights:  551
## initial  value 2233.268204 
## iter  10 value 405.188657
## iter  10 value 405.188657
## iter  10 value 405.188657
## final  value 405.188657 
## converged
## # weights:  111
## initial  value 1851.383385 
## iter  10 value 406.087580
## iter  20 value 405.878186
## iter  30 value 405.833600
## final  value 405.832583 
## converged
## # weights:  331
## initial  value 1949.264408 
## iter  10 value 429.725878
## iter  20 value 400.326839
## iter  30 value 395.245256
## iter  40 value 389.268638
## iter  50 value 387.952179
## iter  60 value 387.532035
## iter  70 value 387.527307
## iter  80 value 387.167430
## final  value 387.054265 
## converged
## # weights:  551
## initial  value 4056.146122 
## iter  10 value 405.994701
## iter  20 value 405.455628
## iter  30 value 396.167370
## iter  40 value 396.154176
## iter  50 value 396.150897
## iter  60 value 396.149050
## iter  70 value 396.101972
## iter  80 value 396.046879
## iter  90 value 396.024288
## iter 100 value 395.973880
## final  value 395.973880 
## stopped after 100 iterations
## # weights:  111
## initial  value 2851.518161 
## final  value 405.260777 
## converged
## # weights:  331
## initial  value 4850.974678 
## iter  10 value 404.851149
## final  value 404.841118 
## converged
## # weights:  551
## initial  value 3058.124604 
## iter  10 value 402.839806
## iter  20 value 402.807782
## iter  30 value 397.867197
## iter  40 value 395.966942
## iter  50 value 395.404719
## final  value 395.373616 
## converged
## # weights:  111
## initial  value 1057.065473 
## iter  10 value 397.209430
## final  value 397.079618 
## converged
## # weights:  331
## initial  value 2486.540786 
## iter  10 value 401.260696
## iter  20 value 401.214798
## final  value 401.214756 
## converged
## # weights:  551
## initial  value 2108.596103 
## final  value 400.538296 
## converged
## # weights:  111
## initial  value 3532.254073 
## iter  10 value 397.981475
## iter  20 value 394.440169
## iter  30 value 393.349167
## iter  40 value 393.270587
## iter  50 value 391.663427
## iter  60 value 384.541467
## iter  70 value 371.638406
## iter  80 value 361.687354
## iter  90 value 256.939084
## iter 100 value 233.687055
## final  value 233.687055 
## stopped after 100 iterations
## # weights:  331
## initial  value 2336.800165 
## iter  10 value 402.036792
## iter  20 value 395.328606
## iter  30 value 393.887192
## iter  40 value 388.900423
## iter  50 value 372.635580
## iter  60 value 330.468293
## iter  70 value 313.825876
## iter  80 value 256.888057
## iter  90 value 238.726987
## iter 100 value 229.042759
## final  value 229.042759 
## stopped after 100 iterations
## # weights:  551
## initial  value 3652.948225 
## iter  10 value 408.614243
## iter  20 value 394.380756
## iter  30 value 393.077234
## iter  40 value 390.691884
## iter  50 value 389.441149
## iter  60 value 383.445919
## iter  70 value 381.315711
## iter  80 value 376.789414
## iter  90 value 346.029009
## iter 100 value 305.900243
## final  value 305.900243 
## stopped after 100 iterations
## # weights:  111
## initial  value 3520.124367 
## final  value 401.540696 
## converged
## # weights:  331
## initial  value 1733.320568 
## iter  10 value 401.544764
## final  value 401.544755 
## converged
## # weights:  551
## initial  value 1736.721049 
## iter  10 value 400.393356
## iter  20 value 400.387873
## iter  30 value 397.554935
## iter  40 value 396.774029
## iter  50 value 395.575322
## iter  60 value 395.053893
## iter  70 value 394.822481
## iter  80 value 394.744657
## iter  90 value 394.143307
## iter 100 value 393.395083
## final  value 393.395083 
## stopped after 100 iterations
## # weights:  111
## initial  value 2683.872966 
## iter  10 value 605.431356
## iter  20 value 599.665056
## iter  30 value 595.284711
## iter  40 value 591.695562
## iter  50 value 586.061543
## iter  60 value 560.213796
## iter  70 value 487.607495
## iter  80 value 422.202477
## iter  90 value 375.851443
## iter 100 value 356.141733
## final  value 356.141733 
## stopped after 100 iterations
Adult_TDA_PC_5.50.5_n1_NN1Fit0
## Neural Network 
## 
## 4917 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 3278, 3279, 3277 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa     
##   1     0e+00  0.9733579  0.00000000
##   1     1e-04  0.9733579  0.00000000
##   1     1e-01  0.9733579  0.00000000
##   3     0e+00  0.9733579  0.00000000
##   3     1e-04  0.9733579  0.00000000
##   3     1e-01  0.9717318  0.02020914
##   5     0e+00  0.9733579  0.00000000
##   5     1e-04  0.9733579  0.00000000
##   5     1e-01  0.9733579  0.00000000
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 1 and decay = 0.1.
Adult_TDA_PC_5.50.5_n1_NN1Fit0$resample
##    Accuracy Kappa Resample
## 1 0.9731544     0    Fold1
## 2 0.9737485     0    Fold2
## 3 0.9731707     0    Fold3
ad_tda_pc_5.50.5_n1_nn1_fit_re<-Adult_TDA_PC_5.50.5_n1_NN1Fit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n1_NN1Fit0)
## a 108-1-1 network with 111 weights
## options were - entropy fitting  decay=0.1
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     3.57    -0.09     1.63     0.09     0.69     0.00     2.49    -1.43 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##    -0.92     1.02     0.00     0.00     0.24     0.44     0.54     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.39     1.25     0.22    -0.09     0.43    -0.23    -2.00     1.85 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##    -0.69     0.00    -0.85     2.09    -0.23     0.43     0.62     2.45 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.07     0.00     0.00     1.63     1.39     0.04    -0.91 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##    -0.61    -0.74     0.04     0.93     0.37     0.00    -0.62     0.87 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.98     0.86    -0.67     2.76     0.67     0.13     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.01     0.48     0.34     1.82     0.68     0.26     0.01     3.56 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.00    -0.08     0.97     0.00     0.42    -1.80     0.12 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     1.29     0.00     0.01     0.35     0.92     0.14     0.91     0.49 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.04     0.01    -1.48    -2.15 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.02     0.63     0.00     0.11     0.02     0.63     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.38     0.06     0.29     0.21     0.34     0.30     1.15 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.11     0.00    -0.94     0.00     0.02 
##  b->o h1->o 
##  0.76  7.32
# Predict outcome using Adult_TDA_PC_5.50.5_n1_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n1_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n1_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n1_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K      0     0
##      >50K    7416  2352
##                                           
##                Accuracy : 0.2408          
##                  95% CI : (0.2323, 0.2494)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0000          
##             Specificity : 1.0000          
##          Pos Pred Value :    NaN          
##          Neg Pred Value : 0.2408          
##              Prevalence : 0.7592          
##          Detection Rate : 0.0000          
##    Detection Prevalence : 0.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n1_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K      0     0
##      >50K    7416  2352
##                                           
##                Accuracy : 0.2408          
##                  95% CI : (0.2323, 0.2494)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0000          
##             Specificity : 1.0000          
##          Pos Pred Value :    NaN          
##          Neg Pred Value : 0.2408          
##              Prevalence : 0.7592          
##          Detection Rate : 0.0000          
##    Detection Prevalence : 0.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n1_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.2407862      0.0000000      0.2323343      0.2493929      0.7592138 
## AccuracyPValue  McnemarPValue 
##      1.0000000      0.0000000
ad_tda_pc_5.50.5_n1_nn1_cf0_ov_acc<-ad_tda_pc_5.50.5_n1_nn1_cf0$overall[1]
ad_tda_pc_5.50.5_n1_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.0000000            1.0000000                  NaN 
##       Neg Pred Value            Precision               Recall 
##            0.2407862                   NA            0.0000000 
##                   F1           Prevalence       Detection Rate 
##                   NA            0.7592138            0.0000000 
## Detection Prevalence    Balanced Accuracy 
##            0.0000000            0.5000000
ad_tda_pc_5.50.5_n1_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n1_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_nn1_n1_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.50.5_n1_nn1_fit_re)
diff_tda_pca_5.50.5_nn1_n1_3_fold
##     Accuracy
## 1 -0.1806047
## 2 -0.1639301
## 3 -0.1769086
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nn1.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n1_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nn1.n1_3_fold_odds.left<-bst_tda_pca_5.50.5_nn1.n1_3_fold$probLeft/bst_tda_pca_5.50.5_nn1.n1_3_fold$probRight
bst_tda_pca_5.50.5_nn1.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nn1.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n1_3_fold
## $winLeft
## [1] 0.9914
## 
## $winRope
## [1] 0.0086
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nn1.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n1_3_fold
## $left
## [1] 0.9993661
## 
## $rope
## [1] 0.0001302352
## 
## $right
## [1] 0.0005036318
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nn1_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1_n1_3_fold))
#bf_tda_pca_5.50.5_nn1.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nn1_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_nn1_n1_3_fold)
## t = -34.378, df = 2, p-value = 0.0008451
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1955688 -0.1520601
## sample estimates:
##  mean of x 
## -0.1738144
### Test set diff
diff_tda_pca_5.50.5_nn1.n1_test<-(nn1_cf_ov_acc - ad_tda_pc_5.50.5_n1_nn1_cf0_ov_acc)
diff_tda_pca_5.50.5_nn1.n1_test
##  Accuracy 
## 0.5250819
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nn1.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1.n1_test),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nn1.n1_test_odds.left<-bst_tda_pca_5.50.5_nn1.n1_test$probLeft/bst_tda_pca_5.50.5_nn1.n1_test$probRight
bst_tda_pca_5.50.5_nn1.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nn1.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1.n1_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1585333
## 
## $winRight
## [1] 0.8414667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nn1.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nn1.n1_test)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1.n1_test)) #bf_tda_pca_5.50.5_nn1.n1_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nn1.n1_test))

##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node2

##Adult_TDA_PC_5.50.5_n2_NN1Fit0 <- nnet(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec, size=2, range = 0.6,, type='class')

#Neural Network 1
Adult_TDA_PC_5.50.5_n2_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec, 
                       method = 'nnet', 
                          trControl = fitControl,
                       metric='Accuracy')
## # weights:  111
## initial  value 5607.876099 
## final  value 5607.438951 
## converged
## # weights:  331
## initial  value 5735.455529 
## iter  10 value 5598.930746
## final  value 5598.926078 
## converged
## # weights:  551
## initial  value 6047.566074 
## iter  10 value 5579.408092
## final  value 5579.391908 
## converged
## # weights:  111
## initial  value 5800.886111 
## iter  10 value 5607.455321
## iter  20 value 5607.440736
## final  value 5607.440622 
## converged
## # weights:  331
## initial  value 5664.820732 
## iter  10 value 5408.177239
## iter  20 value 5368.399441
## iter  30 value 5352.530152
## iter  40 value 5191.916552
## iter  50 value 5158.965193
## iter  60 value 5154.453854
## iter  70 value 4812.388812
## iter  80 value 4558.216801
## iter  90 value 4462.884365
## iter 100 value 4453.258983
## final  value 4453.258983 
## stopped after 100 iterations
## # weights:  551
## initial  value 6061.381899 
## iter  10 value 5555.188679
## iter  20 value 5518.749362
## iter  30 value 5489.970104
## iter  40 value 5407.626597
## iter  50 value 5361.352233
## iter  60 value 5361.220120
## iter  70 value 5354.887308
## iter  80 value 5350.713310
## iter  90 value 5347.519382
## iter 100 value 5333.995099
## final  value 5333.995099 
## stopped after 100 iterations
## # weights:  111
## initial  value 6252.983273 
## final  value 5607.440681 
## converged
## # weights:  331
## initial  value 7193.459206 
## iter  10 value 5604.425349
## final  value 5604.420229 
## converged
## # weights:  551
## initial  value 5794.790497 
## iter  10 value 5598.949337
## final  value 5598.948174 
## converged
## # weights:  111
## initial  value 5768.462160 
## iter  10 value 5603.365814
## final  value 5603.362899 
## converged
## # weights:  331
## initial  value 6532.325537 
## iter  10 value 5600.943949
## final  value 5600.928895 
## converged
## # weights:  551
## initial  value 5643.169581 
## iter  10 value 5588.126472
## final  value 5588.123342 
## converged
## # weights:  111
## initial  value 6342.187532 
## iter  10 value 5607.623873
## final  value 5607.620006 
## converged
## # weights:  331
## initial  value 5680.388523 
## iter  10 value 5606.390660
## iter  20 value 5334.078519
## iter  30 value 5326.153844
## final  value 5326.149995 
## converged
## # weights:  551
## initial  value 5703.528309 
## iter  10 value 5606.393025
## iter  20 value 5289.121827
## iter  30 value 5230.151453
## iter  40 value 5199.493063
## iter  50 value 5098.579769
## iter  60 value 4886.547088
## iter  70 value 4821.911804
## iter  80 value 4601.041210
## iter  90 value 4536.775843
## iter 100 value 4474.470669
## final  value 4474.470669 
## stopped after 100 iterations
## # weights:  111
## initial  value 5900.730339 
## iter  10 value 5339.267986
## iter  20 value 5301.732672
## iter  30 value 5226.367495
## iter  40 value 5189.797763
## iter  50 value 5185.547922
## iter  60 value 5184.545884
## iter  70 value 5179.334854
## iter  80 value 5179.220936
## iter  90 value 5176.469028
## iter 100 value 5168.203933
## final  value 5168.203933 
## stopped after 100 iterations
## # weights:  331
## initial  value 5721.324085 
## final  value 5607.624093 
## converged
## # weights:  551
## initial  value 5591.099026 
## iter  10 value 5469.068721
## iter  20 value 5465.311718
## iter  30 value 5317.530066
## iter  40 value 5314.474026
## iter  50 value 5314.470131
## iter  50 value 5314.470128
## iter  50 value 5314.470114
## final  value 5314.470114 
## converged
## # weights:  111
## initial  value 5743.963390 
## iter  10 value 5602.757357
## final  value 5602.754555 
## converged
## # weights:  331
## initial  value 5745.832614 
## iter  10 value 5602.146313
## final  value 5602.146098 
## converged
## # weights:  551
## initial  value 5911.465776 
## final  value 5608.225885 
## converged
## # weights:  111
## initial  value 6820.819861 
## iter  10 value 5552.779178
## iter  20 value 5542.708372
## iter  30 value 5542.478992
## iter  40 value 5386.176326
## iter  50 value 5311.508366
## iter  60 value 5309.827601
## iter  70 value 5269.499333
## iter  80 value 5212.111587
## iter  90 value 5160.775942
## iter 100 value 5113.545542
## final  value 5113.545542 
## stopped after 100 iterations
## # weights:  331
## initial  value 6237.839329 
## iter  10 value 5605.614658
## iter  20 value 5605.592911
## iter  30 value 5349.688222
## iter  40 value 5335.192391
## iter  50 value 5332.510359
## iter  60 value 5327.456408
## iter  70 value 5321.007105
## iter  80 value 5306.766060
## iter  90 value 5285.529799
## iter 100 value 5216.449418
## final  value 5216.449418 
## stopped after 100 iterations
## # weights:  551
## initial  value 6484.690170 
## iter  10 value 5372.726873
## iter  20 value 5233.152436
## iter  30 value 5152.795264
## iter  40 value 4993.872333
## iter  50 value 4796.481116
## iter  60 value 4576.495932
## iter  70 value 4541.187994
## iter  80 value 4486.897704
## iter  90 value 4404.901516
## iter 100 value 4310.826482
## final  value 4310.826482 
## stopped after 100 iterations
## # weights:  111
## initial  value 5666.854577 
## final  value 5608.227552 
## converged
## # weights:  331
## initial  value 6021.163120 
## iter  10 value 5607.633517
## final  value 5607.630322 
## converged
## # weights:  551
## initial  value 6226.115771 
## iter  10 value 5602.772949
## final  value 5602.771749 
## converged
## # weights:  551
## initial  value 8490.573899 
## iter  10 value 8341.416186
## iter  20 value 8272.420386
## iter  30 value 7943.233730
## iter  40 value 7805.117832
## iter  50 value 7784.484008
## iter  60 value 7769.156248
## iter  70 value 7760.415003
## iter  80 value 7736.962852
## iter  90 value 7680.897033
## iter 100 value 7625.137745
## final  value 7625.137745 
## stopped after 100 iterations
Adult_TDA_PC_5.50.5_n2_NN1Fit0
## Neural Network 
## 
## 12206 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8137, 8137, 8138 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa     
##   1     0e+00  0.5447321  0.00000000
##   1     1e-04  0.5437491  0.04885036
##   1     1e-01  0.5457154  0.05094844
##   3     0e+00  0.5447321  0.00000000
##   3     1e-04  0.5447321  0.00000000
##   3     1e-01  0.6133835  0.20156431
##   5     0e+00  0.5447321  0.00000000
##   5     1e-04  0.5447321  0.00000000
##   5     1e-01  0.6588604  0.29018640
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 5 and decay = 0.1.
Adult_TDA_PC_5.50.5_n2_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.7084562 0.4199237    Fold3
## 2 0.5446056 0.0000000    Fold1
## 3 0.7235193 0.4506355    Fold2
ad_tda_pc_5.50.5_n2_nn1_fit_re<-Adult_TDA_PC_5.50.5_n2_NN1Fit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n2_NN1Fit0)
## a 108-5-1 network with 551 weights
## options were - entropy fitting  decay=0.1
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##    -0.01     0.00     0.01    -0.01     0.00    -0.01     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.01     0.00     0.01     0.00    -0.01     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.01    -0.01     0.01    -0.01     0.01    -0.01    -0.01 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.01     0.00     0.00     0.01     0.01     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##    -0.01    -0.01    -0.01     0.00     0.00     0.00     0.00    -0.01 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00    -0.01     0.00    -0.01     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##    -0.01     0.00     0.00     0.01     0.00    -0.01     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00    -0.01     0.00    -0.01     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.00     0.00    -0.01     0.00     0.00     0.00     0.01 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00    -0.01     0.01     0.00     0.01     0.01     0.00     0.01 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.01     0.00     0.00    -0.01    -0.01     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.01     0.00     0.00    -0.01     0.01     0.01     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.01    -0.01     0.00     0.00     0.01     0.01     0.01 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.01    -0.01     0.01     0.01 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     1.81    -0.01    -0.34    -1.82     2.57     0.00     1.67    -0.47 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.50    -0.30     0.01     0.00     2.02    -0.96     0.03     0.01 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##    -0.80     0.80     0.16    -0.26    -0.53    -0.57    -0.09     3.66 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##    -1.22    -0.01    -0.12    -0.30     0.34    -0.22     0.00     2.42 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00    -0.13    -0.09    -0.16    -0.34    -0.92     0.00     1.64 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##    -2.25     1.45    -0.11     1.87     0.15     0.00     0.23     0.12 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.38    -0.41     0.03     2.80    -0.37     0.00     0.16    -0.29 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##    -0.49     0.40     0.03    -0.34    -0.44     2.16    -0.79     2.60 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.00    -0.06    -0.11     0.01     2.39     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.00    -0.01    -0.01     0.00    -0.16     0.87    -0.20 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00    -0.01    -0.01     0.00     0.00     0.01     0.03 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.03     0.00    -0.01     0.00     0.25    -0.01     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.01    -0.02     0.00    -0.04    -0.56     0.00     0.05    -0.03 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00    -0.01    -0.70     0.04     0.00 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##    -0.01     0.00     0.00     0.01    -0.01     0.01     0.00     0.00 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     0.00     0.00    -0.01     0.00     0.00     0.00     0.00     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##    -0.01    -0.01     0.01     0.00     0.01     0.00     0.00     0.01 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.00     0.00    -0.01     0.00     0.00     0.00     0.00     0.00 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.01    -0.01     0.00    -0.01     0.01     0.00     0.00     0.01 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##     0.01     0.00     0.01    -0.01     0.00     0.01     0.01     0.01 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.00     0.00     0.00    -0.01     0.01     0.01     0.00     0.00 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.00     0.00     0.00     0.01     0.00     0.00     0.01     0.00 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.02     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00     0.00     0.00     0.01     0.01     0.01     0.00     0.00 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00    -0.01     0.01     0.00     0.00     0.00     0.01    -0.01 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00     0.00     0.00     0.00    -0.01     0.00     0.00    -0.01 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.01     0.00     0.00     0.01     0.00     0.00    -0.01     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##    -0.01     0.01     0.01     0.00     0.00 
##    b->h4   i1->h4   i2->h4   i3->h4   i4->h4   i5->h4   i6->h4   i7->h4 
##     0.00     0.01    -0.01    -0.01     0.01     0.01    -0.01     0.01 
##   i8->h4   i9->h4  i10->h4  i11->h4  i12->h4  i13->h4  i14->h4  i15->h4 
##    -0.01     0.00     0.00     0.11    -0.01     0.00    -0.01     0.01 
##  i16->h4  i17->h4  i18->h4  i19->h4  i20->h4  i21->h4  i22->h4  i23->h4 
##     0.00     0.01     0.00     0.00    -0.01     0.00    -0.01     0.01 
##  i24->h4  i25->h4  i26->h4  i27->h4  i28->h4  i29->h4  i30->h4  i31->h4 
##     0.00    -0.01     0.00     0.00     0.00    -0.01    -0.01     0.00 
##  i32->h4  i33->h4  i34->h4  i35->h4  i36->h4  i37->h4  i38->h4  i39->h4 
##    -0.01     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h4  i41->h4  i42->h4  i43->h4  i44->h4  i45->h4  i46->h4  i47->h4 
##     0.00     0.00     0.00     0.00     0.01    -0.01     0.00    -0.01 
##  i48->h4  i49->h4  i50->h4  i51->h4  i52->h4  i53->h4  i54->h4  i55->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00    -0.01 
##  i56->h4  i57->h4  i58->h4  i59->h4  i60->h4  i61->h4  i62->h4  i63->h4 
##    -0.01     0.00     0.00     0.00     0.00     0.00     0.01    -0.01 
##  i64->h4  i65->h4  i66->h4  i67->h4  i68->h4  i69->h4  i70->h4  i71->h4 
##     0.01     0.00     0.00     0.01     0.00    -0.01     0.01     0.01 
##  i72->h4  i73->h4  i74->h4  i75->h4  i76->h4  i77->h4  i78->h4  i79->h4 
##     0.00     0.00     0.00    -0.01    -0.01     0.00     0.00     0.01 
##  i80->h4  i81->h4  i82->h4  i83->h4  i84->h4  i85->h4  i86->h4  i87->h4 
##     0.00    -0.01     0.00     0.00     0.00     0.00     0.01     0.00 
##  i88->h4  i89->h4  i90->h4  i91->h4  i92->h4  i93->h4  i94->h4  i95->h4 
##     0.00     0.00     0.00    -0.01     0.01     0.00     0.01     0.00 
##  i96->h4  i97->h4  i98->h4  i99->h4 i100->h4 i101->h4 i102->h4 i103->h4 
##     0.00     0.00     0.01     0.00    -0.01     0.01     0.00     0.00 
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4 
##    -0.01     0.01     0.00     0.00     0.00 
##    b->h5   i1->h5   i2->h5   i3->h5   i4->h5   i5->h5   i6->h5   i7->h5 
##     0.00    -0.01     0.00    -0.01     0.00     0.00    -0.01     0.00 
##   i8->h5   i9->h5  i10->h5  i11->h5  i12->h5  i13->h5  i14->h5  i15->h5 
##     0.00     0.00    -0.01    -0.18     0.00     0.01     0.01     0.00 
##  i16->h5  i17->h5  i18->h5  i19->h5  i20->h5  i21->h5  i22->h5  i23->h5 
##     0.00     0.01     0.01     0.00     0.00     0.00    -0.01     0.01 
##  i24->h5  i25->h5  i26->h5  i27->h5  i28->h5  i29->h5  i30->h5  i31->h5 
##     0.00    -0.01     0.00     0.00     0.00     0.00    -0.01    -0.01 
##  i32->h5  i33->h5  i34->h5  i35->h5  i36->h5  i37->h5  i38->h5  i39->h5 
##     0.01     0.01     0.01     0.00     0.00     0.00    -0.01    -0.01 
##  i40->h5  i41->h5  i42->h5  i43->h5  i44->h5  i45->h5  i46->h5  i47->h5 
##     0.00     0.00     0.01     0.00     0.00    -0.01     0.01    -0.01 
##  i48->h5  i49->h5  i50->h5  i51->h5  i52->h5  i53->h5  i54->h5  i55->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00    -0.01 
##  i56->h5  i57->h5  i58->h5  i59->h5  i60->h5  i61->h5  i62->h5  i63->h5 
##     0.00     0.00     0.00    -0.01     0.00     0.00     0.00    -0.01 
##  i64->h5  i65->h5  i66->h5  i67->h5  i68->h5  i69->h5  i70->h5  i71->h5 
##     0.04     0.01     0.00     0.00     0.00     0.01    -0.01     0.00 
##  i72->h5  i73->h5  i74->h5  i75->h5  i76->h5  i77->h5  i78->h5  i79->h5 
##    -0.01    -0.01     0.01     0.00     0.01     0.00     0.01    -0.01 
##  i80->h5  i81->h5  i82->h5  i83->h5  i84->h5  i85->h5  i86->h5  i87->h5 
##     0.01     0.00    -0.01     0.00     0.00    -0.01    -0.01    -0.01 
##  i88->h5  i89->h5  i90->h5  i91->h5  i92->h5  i93->h5  i94->h5  i95->h5 
##    -0.01    -0.01     0.00     0.00     0.00     0.00     0.01     0.00 
##  i96->h5  i97->h5  i98->h5  i99->h5 i100->h5 i101->h5 i102->h5 i103->h5 
##    -0.01     0.00     0.00    -0.01    -0.01     0.00    -0.01    -0.01 
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5 
##    -0.01     0.00     0.00    -0.01    -0.01 
##  b->o h1->o h2->o h3->o h4->o h5->o 
##  2.60  0.29 -4.91  1.66  0.28  0.56
# Predict outcome using Adult_TDA_PC_5.50.5_n2_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n2_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n2_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n2_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   3987  1415
##      >50K    3429   937
##                                           
##                Accuracy : 0.5041          
##                  95% CI : (0.4941, 0.5141)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.0495         
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.5376          
##             Specificity : 0.3984          
##          Pos Pred Value : 0.7381          
##          Neg Pred Value : 0.2146          
##              Prevalence : 0.7592          
##          Detection Rate : 0.4082          
##    Detection Prevalence : 0.5530          
##       Balanced Accuracy : 0.4680          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n2_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   3987  1415
##      >50K    3429   937
##                                           
##                Accuracy : 0.5041          
##                  95% CI : (0.4941, 0.5141)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.0495         
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.5376          
##             Specificity : 0.3984          
##          Pos Pred Value : 0.7381          
##          Neg Pred Value : 0.2146          
##              Prevalence : 0.7592          
##          Detection Rate : 0.4082          
##    Detection Prevalence : 0.5530          
##       Balanced Accuracy : 0.4680          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n2_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   5.040950e-01  -4.951743e-02   4.941287e-01   5.140589e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.000000e+00  6.154697e-184
ad_tda_pc_5.50.5_n2_nn1_cf0_ov_acc<-ad_tda_pc_5.50.5_n2_nn1_cf0$overall[1]
ad_tda_pc_5.50.5_n2_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.5376214            0.3983844            0.7380600 
##       Neg Pred Value            Precision               Recall 
##            0.2146129            0.7380600            0.5376214 
##                   F1           Prevalence       Detection Rate 
##            0.6220939            0.7592138            0.4081695 
## Detection Prevalence    Balanced Accuracy 
##            0.5530303            0.4680029
ad_tda_pc_5.50.5_n2_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n2_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_nn1_n2_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.50.5_n2_nn1_fit_re)
diff_tda_pca_5.50.5_nn1_n2_3_fold
##     Accuracy
## 1 0.08409345
## 2 0.26521282
## 3 0.07274288
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nn1.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nn1.n2_3_fold_odds.left<-bst_tda_pca_5.50.5_nn1.n2_3_fold$probLeft/bst_tda_pca_5.50.5_nn1.n2_3_fold$probRight
bst_tda_pca_5.50.5_nn1.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nn1.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.0088
## 
## $winRight
## [1] 0.9912
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nn1.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n2_3_fold
## $left
## [1] 0.08571311
## 
## $rope
## [1] 0.01986901
## 
## $right
## [1] 0.8944179
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nn1_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1_n2_3_fold))
#bf_tda_pca_5.50.5_nn1.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nn1_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_nn1_n2_3_fold)
## t = 2.2563, df = 2, p-value = 0.1527
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1275918  0.4089579
## sample estimates:
## mean of x 
##  0.140683
### Test set diff
diff_tda_pca_5.50.5_nn1.n2_test<-(nn1_cf_ov_acc - ad_tda_pc_5.50.5_n2_nn1_cf0_ov_acc)
diff_tda_pca_5.50.5_nn1.n2_test
##  Accuracy 
## 0.2617731
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nn1.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1.n2_test),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nn1.n2_test_odds.left<-bst_tda_pca_5.50.5_nn1.n2_test$probLeft/bst_tda_pca_5.50.5_nn1.n2_test$probRight
bst_tda_pca_5.50.5_nn1.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nn1.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1.n2_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1603667
## 
## $winRight
## [1] 0.8396333
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nn1.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nn1.n2_test)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1.n2_test)) #bf_tda_pca_5.50.5_nn1.n2_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nn1.n2_test))


##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node3

#Neural Network 1
Adult_TDA_PC_5.50.5_n3_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n3.vec, 
                       method = 'nnet', 
                          trControl = fitControl,
                       metric='Accuracy')
## # weights:  111
## initial  value 5098.560007 
## final  value 4744.174576 
## converged
## # weights:  331
## initial  value 10279.912483 
## iter  10 value 4702.006919
## iter  10 value 4702.006918
## iter  10 value 4702.006918
## final  value 4702.006918 
## converged
## # weights:  551
## initial  value 6147.464370 
## iter  10 value 4732.360697
## final  value 4732.352720 
## converged
## # weights:  111
## initial  value 4775.759254 
## iter  10 value 4744.249140
## final  value 4744.248602 
## converged
## # weights:  331
## initial  value 7459.434280 
## iter  10 value 4742.065875
## iter  20 value 4507.886953
## iter  30 value 4407.197729
## iter  40 value 4327.669257
## iter  50 value 4319.892598
## iter  60 value 4319.054971
## iter  70 value 4304.831330
## iter  80 value 4302.469138
## iter  90 value 4279.163303
## iter 100 value 4212.279949
## final  value 4212.279949 
## stopped after 100 iterations
## # weights:  551
## initial  value 6660.420339 
## iter  10 value 4586.284295
## iter  20 value 4481.259731
## iter  30 value 4476.572315
## final  value 4475.593432 
## converged
## # weights:  111
## initial  value 7124.668910 
## final  value 4744.176417 
## converged
## # weights:  331
## initial  value 5876.750639 
## final  value 4744.180119 
## converged
## # weights:  551
## initial  value 6567.419229 
## iter  10 value 4733.650317
## final  value 4733.643072 
## converged
## # weights:  111
## initial  value 4861.507422 
## iter  10 value 4699.291245
## iter  20 value 4622.018416
## iter  30 value 4577.678184
## iter  40 value 4569.768952
## final  value 4569.758382 
## converged
## # weights:  331
## initial  value 9759.526518 
## iter  10 value 4743.927264
## final  value 4743.915183 
## converged
## # weights:  551
## initial  value 9608.505775 
## iter  10 value 4729.186968
## iter  20 value 4729.135119
## iter  20 value 4729.135094
## iter  20 value 4729.135093
## final  value 4729.135093 
## converged
## # weights:  111
## initial  value 5520.586253 
## iter  10 value 4745.538870
## iter  10 value 4745.538829
## iter  10 value 4745.538802
## final  value 4745.538802 
## converged
## # weights:  331
## initial  value 9744.835276 
## iter  10 value 4746.578731
## iter  20 value 4488.131741
## iter  30 value 4460.042448
## iter  40 value 4429.883635
## iter  50 value 4400.286414
## iter  60 value 4390.982274
## iter  70 value 4307.548500
## iter  80 value 4162.284183
## iter  90 value 4037.347059
## iter 100 value 3978.169086
## final  value 3978.169086 
## stopped after 100 iterations
## # weights:  551
## initial  value 8102.902366 
## iter  10 value 4561.385980
## iter  20 value 4473.689575
## iter  30 value 4462.451517
## iter  40 value 4434.295631
## iter  50 value 4375.103599
## iter  60 value 4370.194879
## iter  70 value 4353.327675
## iter  80 value 4314.975737
## iter  90 value 4267.033014
## iter 100 value 4263.546381
## final  value 4263.546381 
## stopped after 100 iterations
## # weights:  111
## initial  value 5705.745688 
## iter  10 value 4743.930823
## final  value 4743.924418 
## converged
## # weights:  331
## initial  value 14527.338073 
## final  value 4721.751065 
## converged
## # weights:  551
## initial  value 5098.383135 
## iter  10 value 4733.603319
## final  value 4733.597075 
## converged
## # weights:  111
## initial  value 7578.269479 
## iter  10 value 4718.179799
## final  value 4718.079635 
## converged
## # weights:  331
## initial  value 4897.274613 
## final  value 4680.851382 
## converged
## # weights:  551
## initial  value 7314.215011 
## iter  10 value 4671.762280
## iter  20 value 4510.622311
## iter  30 value 4507.095786
## final  value 4507.088580 
## converged
## # weights:  111
## initial  value 5733.004515 
## iter  10 value 4744.063181
## iter  10 value 4744.063138
## final  value 4744.063138 
## converged
## # weights:  331
## initial  value 5465.634747 
## iter  10 value 4743.995910
## iter  20 value 4658.882993
## iter  30 value 4451.158372
## iter  40 value 4434.729615
## iter  50 value 4432.704549
## iter  60 value 4431.813387
## iter  70 value 4424.233026
## iter  80 value 4423.631000
## iter  90 value 4422.994570
## iter  90 value 4422.994538
## iter  90 value 4422.994538
## final  value 4422.994538 
## converged
## # weights:  551
## initial  value 5369.740792 
## iter  10 value 4737.722875
## iter  20 value 4712.693915
## iter  30 value 4470.303671
## iter  40 value 4448.827229
## iter  50 value 4401.422600
## iter  60 value 4324.161594
## iter  70 value 4305.408976
## iter  80 value 4278.118413
## iter  90 value 4265.178554
## iter 100 value 4148.410863
## final  value 4148.410863 
## stopped after 100 iterations
## # weights:  111
## initial  value 6699.315874 
## final  value 4743.917018 
## converged
## # weights:  331
## initial  value 7469.061671 
## final  value 4743.921179 
## converged
## # weights:  551
## initial  value 6716.719896 
## iter  10 value 4722.340581
## final  value 4722.325140 
## converged
## # weights:  551
## initial  value 8764.205748 
## iter  10 value 6939.221704
## iter  20 value 6832.458704
## iter  30 value 6261.688571
## iter  40 value 6188.278370
## iter  50 value 5996.458226
## iter  60 value 5952.761569
## iter  70 value 5714.106166
## iter  80 value 5590.169200
## iter  90 value 5478.799841
## iter 100 value 5437.122525
## final  value 5437.122525 
## stopped after 100 iterations
Adult_TDA_PC_5.50.5_n3_NN1Fit0
## Neural Network 
## 
## 13240 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8827, 8827, 8826 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa      
##   1     0e+00  0.7760575  0.034914825
##   1     1e-04  0.7715257  0.000510032
##   1     1e-01  0.7714502  0.000000000
##   3     0e+00  0.7754530  0.028337449
##   3     1e-04  0.7719789  0.003912412
##   3     1e-01  0.8022666  0.217339741
##   5     0e+00  0.7776427  0.051308166
##   5     1e-04  0.7728851  0.009644625
##   5     1e-01  0.8027944  0.223810663
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 5 and decay = 0.1.
Adult_TDA_PC_5.50.5_n3_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.8049388 0.2576969    Fold3
## 2 0.7967369 0.1862816    Fold1
## 3 0.8067075 0.2274535    Fold2
ad_tda_pc_5.50.5_n3_nn1_fit_re<-Adult_TDA_PC_5.50.5_n3_NN1Fit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n3_NN1Fit0)
## a 108-5-1 network with 551 weights
## options were - entropy fitting  decay=0.1
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.01     0.01     0.00     0.00     0.02     0.01     0.01    -0.01 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00    -0.01     0.00     0.27     0.01    -0.01    -0.01     0.01 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##    -0.01    -0.01     0.00    -0.01     0.01     0.00    -0.01    -0.01 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##    -0.01    -0.01     0.00     0.00    -0.01     0.01     0.00    -0.01 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.01     0.00     0.00     0.00     0.01     0.02     0.01     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.01     0.01    -0.01    -0.01     0.00     0.00     0.00     0.01 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00    -0.02     0.01    -0.01    -0.01     0.01     0.00    -0.01 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.02     0.02     0.01     0.00     0.01     0.02     0.01 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##    -0.06     0.00     0.00     0.00     0.00    -0.01    -0.02     0.02 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.01    -0.02     0.02    -0.01    -0.01    -0.02    -0.02     0.01 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##    -0.01     0.01     0.01     0.01     0.00     0.01    -0.01    -0.01 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.01     0.00     0.01    -0.01    -0.01     0.02     0.00     0.02 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.01     0.00     0.01    -0.01     0.00     0.01    -0.01     0.01 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.01    -0.01     0.01    -0.01 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##    -0.01     0.01    -0.01    -0.01    -0.01     0.02     0.01     0.01 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.00     0.01    -0.01     0.00     0.00    -0.01     0.01     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.01    -0.01     0.01    -0.01     0.00    -0.01     0.00     0.01 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##    -0.01     0.02     0.00    -0.01    -0.01     0.02     0.00    -0.01 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.01     0.00    -0.02     0.01     0.01     0.01     0.01    -0.01 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.00    -0.01     0.01     0.01    -0.01     0.01    -0.01    -0.01 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.01     0.00     0.00     0.01     0.00    -0.01     0.01     0.02 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##    -0.02     0.00     0.02     0.00    -0.01     0.01     0.01     0.00 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.02     0.00     0.00     0.01     0.02     0.02     0.01 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.01    -0.01     0.01    -0.01     0.01     0.00     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.01    -0.01     0.00     0.00     0.02     0.01     0.01 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.01     0.02    -0.01     0.01    -0.01     0.01    -0.01    -0.01 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.01    -0.01     0.02     0.02     0.01     0.01    -0.02     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00    -0.01     0.00     0.00 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     0.04     4.33     0.02    -0.02     0.00     0.01     0.03     0.01 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     0.04     0.01    -0.01     0.02    -0.01    -0.01    -0.05     0.01 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00    -0.03    -0.01    -0.02     0.03    -0.10     0.01     0.05 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##    -0.01     0.01     0.13     0.01     0.82     0.00    -0.02     0.14 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.01    -0.01    -0.09     0.01     0.02     0.00    -0.02    -0.04 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##     0.04     0.04     0.01    -0.01    -0.02     0.02    -0.01     0.05 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.01    -0.02    -0.01     0.14     0.00     0.01     0.01    -0.11 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.00     0.00    -0.01    -0.03     0.01     0.06    -0.08     0.11 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##    -0.57     0.00    -0.18    -0.02     0.01     0.02     0.02     0.01 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00    -0.01     0.00    -0.01    -0.01     0.02     0.02    -0.02 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##    -0.01     0.01    -0.01     0.02     0.01     0.01     0.00    -0.01 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##    -0.01    -0.01     0.02     0.01     0.01    -0.01     0.02     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.01     0.01    -0.01     0.00     0.01     0.00     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.01     0.10    -0.02     0.02 
##    b->h4   i1->h4   i2->h4   i3->h4   i4->h4   i5->h4   i6->h4   i7->h4 
##     1.28    -0.01    -0.69    -1.07     0.58     0.00    -1.01     1.49 
##   i8->h4   i9->h4  i10->h4  i11->h4  i12->h4  i13->h4  i14->h4  i15->h4 
##     0.94     0.38     0.60     0.00     0.06    -0.43    -1.45     0.45 
##  i16->h4  i17->h4  i18->h4  i19->h4  i20->h4  i21->h4  i22->h4  i23->h4 
##     0.21     1.60    -0.26     0.44    -0.01     0.85    -0.25    -0.21 
##  i24->h4  i25->h4  i26->h4  i27->h4  i28->h4  i29->h4  i30->h4  i31->h4 
##     0.94     0.61    -0.68    -0.65    -0.13     0.78     0.37    -0.53 
##  i32->h4  i33->h4  i34->h4  i35->h4  i36->h4  i37->h4  i38->h4  i39->h4 
##     0.20     0.23     0.49    -0.30    -0.67    -1.69     1.05    -0.10 
##  i40->h4  i41->h4  i42->h4  i43->h4  i44->h4  i45->h4  i46->h4  i47->h4 
##    -0.14     5.35    -0.27    -0.74     0.02     0.40     0.00    -0.32 
##  i48->h4  i49->h4  i50->h4  i51->h4  i52->h4  i53->h4  i54->h4  i55->h4 
##    -0.90    -1.33     0.59     2.12     0.06     1.19     1.01    -0.26 
##  i56->h4  i57->h4  i58->h4  i59->h4  i60->h4  i61->h4  i62->h4  i63->h4 
##    -2.88     0.53     0.00    -0.31    -0.09     1.14    -0.77     2.03 
##  i64->h4  i65->h4  i66->h4  i67->h4  i68->h4  i69->h4  i70->h4  i71->h4 
##     0.00     0.00    -0.01    -0.45    -2.11     0.10     2.02     1.54 
##  i72->h4  i73->h4  i74->h4  i75->h4  i76->h4  i77->h4  i78->h4  i79->h4 
##    -2.33     1.11    -0.01     0.15    -2.43    -0.43    -1.93     0.10 
##  i80->h4  i81->h4  i82->h4  i83->h4  i84->h4  i85->h4  i86->h4  i87->h4 
##     0.23    -0.86     0.02     0.00     1.18     0.46     1.96     0.95 
##  i88->h4  i89->h4  i90->h4  i91->h4  i92->h4  i93->h4  i94->h4  i95->h4 
##     0.00    -0.36    -1.56    -0.27     1.03     1.32     1.04     0.10 
##  i96->h4  i97->h4  i98->h4  i99->h4 i100->h4 i101->h4 i102->h4 i103->h4 
##    -0.38    -1.15     1.33     0.67    -0.92    -0.36    -0.53     1.08 
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4 
##     0.13     0.65    -0.92     1.43    -0.40 
##    b->h5   i1->h5   i2->h5   i3->h5   i4->h5   i5->h5   i6->h5   i7->h5 
##    -0.01     0.01     0.01    -0.01    -0.01     0.00     0.01     0.01 
##   i8->h5   i9->h5  i10->h5  i11->h5  i12->h5  i13->h5  i14->h5  i15->h5 
##    -0.01     0.00     0.00     0.03     0.00     0.00     0.02     0.00 
##  i16->h5  i17->h5  i18->h5  i19->h5  i20->h5  i21->h5  i22->h5  i23->h5 
##    -0.01    -0.02    -0.01     0.00     0.01     0.01     0.00    -0.01 
##  i24->h5  i25->h5  i26->h5  i27->h5  i28->h5  i29->h5  i30->h5  i31->h5 
##     0.01     0.00     0.01    -0.01     0.00    -0.01     0.02     0.01 
##  i32->h5  i33->h5  i34->h5  i35->h5  i36->h5  i37->h5  i38->h5  i39->h5 
##    -0.01    -0.01    -0.01    -0.01     0.01     0.00    -0.02    -0.01 
##  i40->h5  i41->h5  i42->h5  i43->h5  i44->h5  i45->h5  i46->h5  i47->h5 
##     0.00     0.00     0.00     0.01     0.00     0.02    -0.01     0.00 
##  i48->h5  i49->h5  i50->h5  i51->h5  i52->h5  i53->h5  i54->h5  i55->h5 
##     0.01     0.00     0.00     0.01     0.01     0.01     0.00     0.00 
##  i56->h5  i57->h5  i58->h5  i59->h5  i60->h5  i61->h5  i62->h5  i63->h5 
##    -0.01     0.01     0.01     0.00    -0.01    -0.02    -0.02     0.01 
##  i64->h5  i65->h5  i66->h5  i67->h5  i68->h5  i69->h5  i70->h5  i71->h5 
##     0.00    -0.01     0.00    -0.01    -0.01     0.01     0.01     0.02 
##  i72->h5  i73->h5  i74->h5  i75->h5  i76->h5  i77->h5  i78->h5  i79->h5 
##    -0.01     0.00    -0.01     0.00    -0.02     0.01     0.00    -0.01 
##  i80->h5  i81->h5  i82->h5  i83->h5  i84->h5  i85->h5  i86->h5  i87->h5 
##     0.00     0.00     0.00     0.02    -0.02    -0.02     0.00    -0.01 
##  i88->h5  i89->h5  i90->h5  i91->h5  i92->h5  i93->h5  i94->h5  i95->h5 
##     0.00     0.01    -0.01     0.02     0.00    -0.02    -0.01     0.00 
##  i96->h5  i97->h5  i98->h5  i99->h5 i100->h5 i101->h5 i102->h5 i103->h5 
##    -0.02    -0.01     0.00     0.00    -0.01    -0.01    -0.01     0.02 
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5 
##     0.00     0.00     0.02     0.01     0.01 
##  b->o h1->o h2->o h3->o h4->o h5->o 
##  0.56  0.54  0.51 -1.56 -3.39  0.52
# Predict outcome using Adult_TDA_PC_5.50.5_n3_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n3_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n3_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n3_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   4912  1711
##      >50K    2504   641
##                                           
##                Accuracy : 0.5685          
##                  95% CI : (0.5586, 0.5783)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.0584         
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.6624          
##             Specificity : 0.2725          
##          Pos Pred Value : 0.7417          
##          Neg Pred Value : 0.2038          
##              Prevalence : 0.7592          
##          Detection Rate : 0.5029          
##    Detection Prevalence : 0.6780          
##       Balanced Accuracy : 0.4674          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n3_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   4912  1711
##      >50K    2504   641
##                                           
##                Accuracy : 0.5685          
##                  95% CI : (0.5586, 0.5783)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.0584         
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.6624          
##             Specificity : 0.2725          
##          Pos Pred Value : 0.7417          
##          Neg Pred Value : 0.2038          
##              Prevalence : 0.7592          
##          Detection Rate : 0.5029          
##    Detection Prevalence : 0.6780          
##       Balanced Accuracy : 0.4674          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n3_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   5.684889e-01  -5.839249e-02   5.585965e-01   5.783405e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.000000e+00   3.144301e-34
ad_tda_pc_5.50.5_n3_nn1_cf0_ov_acc<-ad_tda_pc_5.50.5_n3_nn1_cf0$overall[1]
ad_tda_pc_5.50.5_n3_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.6623517            0.2725340            0.7416579 
##       Neg Pred Value            Precision               Recall 
##            0.2038156            0.7416579            0.6623517 
##                   F1           Prevalence       Detection Rate 
##            0.6997649            0.7592138            0.5028665 
## Detection Prevalence    Balanced Accuracy 
##            0.6780303            0.4674428
ad_tda_pc_5.50.5_n3_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n3_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_nn1_n3_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.50.5_n3_nn1_fit_re)
diff_tda_pca_5.50.5_nn1_n3_3_fold
##      Accuracy
## 1 -0.01238914
## 2  0.01308146
## 3 -0.01044528
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nn1.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n3_3_fold
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nn1.n3_3_fold_odds.left<-bst_tda_pca_5.50.5_nn1.n3_3_fold$probLeft/bst_tda_pca_5.50.5_nn1.n3_3_fold$probRight
bst_tda_pca_5.50.5_nn1.n3_3_fold_odds.left
## [1] 2
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nn1.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n3_3_fold
## $winLeft
## [1] 0.3597
## 
## $winRope
## [1] 0.5833333
## 
## $winRight
## [1] 0.05696667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nn1.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n3_3_fold
## $left
## [1] 0.2746427
## 
## $rope
## [1] 0.5773687
## 
## $right
## [1] 0.1479887
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nn1_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1_n3_3_fold))
#bf_tda_pca_5.50.5_nn1.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nn1_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_nn1_n3_3_fold)
## t = -0.39717, df = 2, p-value = 0.7296
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.03847027  0.03196829
## sample estimates:
##    mean of x 
## -0.003250987
### Test set diff
diff_tda_pca_5.50.5_nn1.n3_test<-(nn1_cf_ov_acc - ad_tda_pc_5.50.5_n3_nn1_cf0_ov_acc)
diff_tda_pca_5.50.5_nn1.n3_test
##  Accuracy 
## 0.1973792
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nn1.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1.n3_test),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nn1.n3_test_odds.left<-bst_tda_pca_5.50.5_nn1.n3_test$probLeft/bst_tda_pca_5.50.5_nn1.n3_test$probRight
bst_tda_pca_5.50.5_nn1.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nn1.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1.n3_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1615
## 
## $winRight
## [1] 0.8385
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nn1.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nn1.n3_test)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1.n3_test)) #bf_tda_pca_5.50.5_nn1.n3_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nn1.n3_test))


##Node4

#Neural Network 1
Adult_TDA_PC_5.50.5_n4_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n4.vec, 
                       method = 'nnet', 
                          trControl = fitControl,
                       metric='Accuracy')
## # weights:  111
## initial  value 10215.062025 
## final  value 2373.078436 
## converged
## # weights:  331
## initial  value 8912.050726 
## final  value 2366.566547 
## converged
## # weights:  551
## initial  value 12960.931372 
## iter  10 value 2400.881216
## final  value 2344.220816 
## converged
## # weights:  111
## initial  value 8102.117664 
## iter  10 value 2373.482839
## final  value 2373.482405 
## converged
## # weights:  331
## initial  value 8995.269604 
## iter  10 value 2369.379529
## iter  20 value 2366.744281
## iter  30 value 2353.317434
## iter  40 value 2270.525432
## iter  50 value 2177.641070
## iter  60 value 2173.809183
## iter  70 value 2161.971961
## iter  80 value 2127.497955
## iter  90 value 2085.234347
## iter 100 value 2053.745643
## final  value 2053.745643 
## stopped after 100 iterations
## # weights:  551
## initial  value 11083.858508 
## iter  10 value 2405.629299
## iter  20 value 2377.031029
## iter  30 value 2373.345990
## iter  40 value 2366.816854
## iter  50 value 2068.084212
## iter  60 value 1938.130453
## iter  70 value 1887.453259
## iter  80 value 1856.430397
## iter  90 value 1855.091535
## iter 100 value 1850.825584
## final  value 1850.825584 
## stopped after 100 iterations
## # weights:  111
## initial  value 6560.353151 
## iter  10 value 2366.618808
## final  value 2366.570656 
## converged
## # weights:  331
## initial  value 6868.961704 
## final  value 2373.084225 
## converged
## # weights:  551
## initial  value 12544.544610 
## final  value 2373.087769 
## converged
## # weights:  111
## initial  value 5231.486883 
## final  value 2375.976970 
## converged
## # weights:  331
## initial  value 4165.172804 
## final  value 2375.976970 
## converged
## # weights:  551
## initial  value 10210.941720 
## final  value 2375.976970 
## converged
## # weights:  111
## initial  value 10782.923774 
## iter  10 value 2376.095327
## iter  20 value 2170.549088
## iter  30 value 2163.415262
## iter  40 value 2098.088894
## iter  50 value 2047.029244
## iter  60 value 2039.047454
## iter  70 value 2026.992621
## iter  80 value 1943.058856
## iter  90 value 1822.527048
## iter 100 value 1764.802770
## final  value 1764.802770 
## stopped after 100 iterations
## # weights:  331
## initial  value 5335.257141 
## iter  10 value 2358.226755
## iter  20 value 2302.549006
## iter  30 value 2172.068936
## iter  40 value 2093.786832
## iter  50 value 2056.355211
## iter  60 value 1996.335934
## iter  70 value 1820.762875
## iter  80 value 1743.590981
## iter  90 value 1677.160852
## iter 100 value 1626.653533
## final  value 1626.653533 
## stopped after 100 iterations
## # weights:  551
## initial  value 3724.457301 
## iter  10 value 2363.486088
## iter  20 value 2329.618202
## iter  30 value 2155.160494
## iter  40 value 2127.599779
## iter  50 value 2088.594764
## iter  60 value 2062.393379
## iter  70 value 2047.273018
## iter  80 value 2046.836162
## iter  90 value 2005.656000
## iter 100 value 1897.702858
## final  value 1897.702858 
## stopped after 100 iterations
## # weights:  111
## initial  value 9745.396271 
## final  value 2358.051294 
## converged
## # weights:  331
## initial  value 7844.331542 
## final  value 2375.982646 
## converged
## # weights:  551
## initial  value 9724.329122 
## final  value 2375.985595 
## converged
## # weights:  111
## initial  value 9164.023050 
## final  value 2373.078436 
## converged
## # weights:  331
## initial  value 12173.105910 
## final  value 2373.078436 
## converged
## # weights:  551
## initial  value 7790.741394 
## final  value 2373.078436 
## converged
## # weights:  111
## initial  value 8883.851332 
## iter  10 value 2404.943138
## iter  20 value 2361.093377
## final  value 2361.084924 
## converged
## # weights:  331
## initial  value 15993.137153 
## iter  10 value 2373.485295
## iter  20 value 2373.335210
## final  value 2373.281742 
## converged
## # weights:  551
## initial  value 4975.138154 
## iter  10 value 2367.758402
## iter  20 value 2167.417966
## iter  30 value 2160.747277
## iter  40 value 2143.745047
## iter  50 value 2141.970933
## iter  60 value 2141.048868
## final  value 2140.000566 
## converged
## # weights:  111
## initial  value 9009.415585 
## iter  10 value 2373.081116
## iter  10 value 2373.081115
## iter  10 value 2373.081115
## final  value 2373.081115 
## converged
## # weights:  331
## initial  value 11724.579916 
## final  value 2373.084145 
## converged
## # weights:  551
## initial  value 5969.530058 
## final  value 2373.088183 
## converged
## # weights:  551
## initial  value 8607.860202 
## iter  10 value 3561.875666
## iter  20 value 3561.808840
## iter  30 value 3372.830025
## iter  40 value 3278.909837
## iter  50 value 3248.538016
## iter  60 value 3156.559620
## iter  70 value 3111.394436
## iter  80 value 3106.867094
## iter  90 value 3076.618386
## iter 100 value 3018.374358
## final  value 3018.374358 
## stopped after 100 iterations
Adult_TDA_PC_5.50.5_n4_NN1Fit0
## Neural Network 
## 
## 16700 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 11133, 11134, 11133 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa      
##   1     0e+00  0.9449102  0.000000000
##   1     1e-04  0.9453294  0.019603585
##   1     1e-01  0.9464074  0.100201043
##   3     0e+00  0.9450299  0.004079951
##   3     1e-04  0.9449102  0.000000000
##   3     1e-01  0.9490420  0.191372598
##   5     0e+00  0.9449701  0.009531380
##   5     1e-04  0.9449102  0.000000000
##   5     1e-01  0.9491017  0.250196953
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 5 and decay = 0.1.
Adult_TDA_PC_5.50.5_n4_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.9486258 0.1716326    Fold3
## 2 0.9507814 0.3033336    Fold1
## 3 0.9478980 0.2756247    Fold2
ad_tda_pc_5.50.5_n4_nn1_fit_re<-Adult_TDA_PC_5.50.5_n4_NN1Fit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n4_NN1Fit0)
## a 108-5-1 network with 551 weights
## options were - entropy fitting  decay=0.1
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00     0.05     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.04     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     1.62     0.00     0.08    -0.10     0.51     0.01     0.52    -0.05 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.45     0.19     0.01     0.00     0.13     0.81     0.52     0.09 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.48     0.23     0.05     0.07     0.66    -1.14    -0.26    -0.41 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##    -0.13     0.16    -0.21     0.58    -0.27     0.18    -0.03    -0.68 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.38     1.13     0.89    -0.25     0.09    -0.09     0.00     0.37 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##    -0.48     0.15     0.21     0.54    -0.23     0.14     0.48    -0.27 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.16    -0.26     0.82     0.86     0.09     0.00     0.91     0.33 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##    -0.57     0.40     0.56    -0.15     0.20     0.62    -0.12     1.75 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.00     0.01    -0.24     0.12     0.05    -0.04     0.04 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.05    -0.04     0.01     0.06     0.23    -0.16     0.10     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00    -0.10    -0.03     0.00     0.03    -0.08    -0.04     0.01 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.07    -0.04    -0.08    -0.31    -0.01    -0.10    -0.01    -0.01 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.02     0.05    -0.04     0.53    -0.29     0.16     0.04     0.02 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.11     0.01     1.61    -0.02    -0.07 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     0.01     0.56     0.00    -0.01     0.02     0.00    -0.01     0.01 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     0.00     0.00     0.00     0.03     0.00     0.00     0.00     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00     0.00     0.00     0.00     0.00    -0.01     0.00     0.00 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##    -0.01     0.00     0.00     0.03     0.06     0.00     0.00     0.00 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.00     0.01     0.00     0.00     0.00     0.00     0.00     0.01 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##    -0.01     0.00     0.00     0.00     0.00    -0.01     0.00     0.00 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.01     0.00     0.01     0.00     0.00     0.01     0.00    -0.01 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.00     0.01     0.00     0.00     0.00     0.00     0.00     0.01 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.09    -0.07     0.72     0.00     0.00     0.00     0.00     0.00 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00     0.01     0.00     0.00 
##    b->h4   i1->h4   i2->h4   i3->h4   i4->h4   i5->h4   i6->h4   i7->h4 
##     0.00    -0.08     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h4   i9->h4  i10->h4  i11->h4  i12->h4  i13->h4  i14->h4  i15->h4 
##     0.00     0.00     0.00     0.06     0.00     0.00     0.00     0.00 
##  i16->h4  i17->h4  i18->h4  i19->h4  i20->h4  i21->h4  i22->h4  i23->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h4  i25->h4  i26->h4  i27->h4  i28->h4  i29->h4  i30->h4  i31->h4 
##     0.00     0.00     0.00     0.00    -0.02     0.00     0.00     0.00 
##  i32->h4  i33->h4  i34->h4  i35->h4  i36->h4  i37->h4  i38->h4  i39->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h4  i41->h4  i42->h4  i43->h4  i44->h4  i45->h4  i46->h4  i47->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h4  i49->h4  i50->h4  i51->h4  i52->h4  i53->h4  i54->h4  i55->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h4  i57->h4  i58->h4  i59->h4  i60->h4  i61->h4  i62->h4  i63->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h4  i65->h4  i66->h4  i67->h4  i68->h4  i69->h4  i70->h4  i71->h4 
##    -0.25     0.04    -0.07     0.00     0.00     0.00     0.00     0.00 
##  i72->h4  i73->h4  i74->h4  i75->h4  i76->h4  i77->h4  i78->h4  i79->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h4  i81->h4  i82->h4  i83->h4  i84->h4  i85->h4  i86->h4  i87->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h4  i89->h4  i90->h4  i91->h4  i92->h4  i93->h4  i94->h4  i95->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h4  i97->h4  i98->h4  i99->h4 i100->h4 i101->h4 i102->h4 i103->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h5   i1->h5   i2->h5   i3->h5   i4->h5   i5->h5   i6->h5   i7->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h5   i9->h5  i10->h5  i11->h5  i12->h5  i13->h5  i14->h5  i15->h5 
##     0.00     0.00     0.00    -0.08     0.00     0.00     0.00     0.00 
##  i16->h5  i17->h5  i18->h5  i19->h5  i20->h5  i21->h5  i22->h5  i23->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h5  i25->h5  i26->h5  i27->h5  i28->h5  i29->h5  i30->h5  i31->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h5  i33->h5  i34->h5  i35->h5  i36->h5  i37->h5  i38->h5  i39->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h5  i41->h5  i42->h5  i43->h5  i44->h5  i45->h5  i46->h5  i47->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h5  i49->h5  i50->h5  i51->h5  i52->h5  i53->h5  i54->h5  i55->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h5  i57->h5  i58->h5  i59->h5  i60->h5  i61->h5  i62->h5  i63->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h5  i65->h5  i66->h5  i67->h5  i68->h5  i69->h5  i70->h5  i71->h5 
##     0.01     0.01     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h5  i73->h5  i74->h5  i75->h5  i76->h5  i77->h5  i78->h5  i79->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h5  i81->h5  i82->h5  i83->h5  i84->h5  i85->h5  i86->h5  i87->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h5  i89->h5  i90->h5  i91->h5  i92->h5  i93->h5  i94->h5  i95->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h5  i97->h5  i98->h5  i99->h5 i100->h5 i101->h5 i102->h5 i103->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5 
##     0.00     0.00     0.00     0.00     0.00 
##  b->o h1->o h2->o h3->o h4->o h5->o 
##  0.23  0.23 -5.41 -0.29  1.68 -0.85
# Predict outcome using Adult_TDA_PC_5.50.5_n4_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n4_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n4_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n4_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7340  1979
##      >50K      76   373
##                                           
##                Accuracy : 0.7896          
##                  95% CI : (0.7814, 0.7977)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 5.314e-13       
##                                           
##                   Kappa : 0.205           
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9898          
##             Specificity : 0.1586          
##          Pos Pred Value : 0.7876          
##          Neg Pred Value : 0.8307          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7514          
##    Detection Prevalence : 0.9540          
##       Balanced Accuracy : 0.5742          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n4_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7340  1979
##      >50K      76   373
##                                           
##                Accuracy : 0.7896          
##                  95% CI : (0.7814, 0.7977)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 5.314e-13       
##                                           
##                   Kappa : 0.205           
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9898          
##             Specificity : 0.1586          
##          Pos Pred Value : 0.7876          
##          Neg Pred Value : 0.8307          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7514          
##    Detection Prevalence : 0.9540          
##       Balanced Accuracy : 0.5742          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n4_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.896192e-01   2.049595e-01   7.813995e-01   7.976658e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   5.314162e-13   0.000000e+00
ad_tda_pc_5.50.5_n4_nn1_cf0_ov_acc<-ad_tda_pc_5.50.5_n4_nn1_cf0$overall[1]
ad_tda_pc_5.50.5_n4_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9897519            0.1585884            0.7876382 
##       Neg Pred Value            Precision               Recall 
##            0.8307350            0.7876382            0.9897519 
##                   F1           Prevalence       Detection Rate 
##            0.8772035            0.7592138            0.7514333 
## Detection Prevalence    Balanced Accuracy 
##            0.9540336            0.5741702
ad_tda_pc_5.50.5_n4_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n4_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_nn1_n4_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.50.5_n4_nn1_fit_re)
diff_tda_pca_5.50.5_nn1_n4_3_fold
##     Accuracy
## 1 -0.1560761
## 2 -0.1409630
## 3 -0.1516358
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nn1.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nn1.n4_3_fold_odds.left<-bst_tda_pca_5.50.5_nn1.n4_3_fold$probLeft/bst_tda_pca_5.50.5_nn1.n4_3_fold$probRight
bst_tda_pca_5.50.5_nn1.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nn1.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n4_3_fold
## $winLeft
## [1] 0.9918
## 
## $winRope
## [1] 0.0082
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nn1.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n4_3_fold
## $left
## [1] 0.999313
## 
## $rope
## [1] 0.0001611837
## 
## $right
## [1] 0.000525845
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nn1_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1_n4_3_fold))
#bf_tda_pca_5.50.5_nn1.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nn1_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_nn1_n4_3_fold)
## t = -33.348, df = 2, p-value = 0.000898
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1688546 -0.1302621
## sample estimates:
##  mean of x 
## -0.1495583
### Test set diff
diff_tda_pca_5.50.5_nn1.n4_test<-(nn1_cf_ov_acc - ad_tda_pc_5.50.5_n4_nn1_cf0_ov_acc)
diff_tda_pca_5.50.5_nn1.n4_test
##    Accuracy 
## -0.02375102
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nn1.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1.n4_test),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n4_test
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nn1.n4_test_odds.left<-bst_tda_pca_5.50.5_nn1.n4_test$probLeft/bst_tda_pca_5.50.5_nn1.n4_test$probRight
bst_tda_pca_5.50.5_nn1.n4_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nn1.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1.n4_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n4_test
## $winLeft
## [1] 0.8415333
## 
## $winRope
## [1] 0.1584667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nn1.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nn1.n4_test)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1.n4_test)) #bf_tda_pca_5.50.5_nn1.n4_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nn1.n4_test))


##Node5

#Neural Network 1
Adult_TDA_PC_5.50.5_n5_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n5.vec, 
                       method = 'nnet', 
                          trControl = fitControl,
                       metric='Accuracy')
## # weights:  111
## initial  value 8140.345479 
## final  value 3684.136149 
## converged
## # weights:  331
## initial  value 6995.676901 
## final  value 3684.136149 
## converged
## # weights:  551
## initial  value 16142.839292 
## final  value 3684.136149 
## converged
## # weights:  111
## initial  value 5273.940835 
## iter  10 value 166.040071
## iter  20 value 141.482625
## iter  30 value 129.869578
## iter  40 value 127.672163
## iter  50 value 127.592083
## iter  60 value 127.582218
## iter  70 value 127.581954
## iter  80 value 127.581491
## final  value 127.581486 
## converged
## # weights:  331
## initial  value 5104.007497 
## iter  10 value 344.683942
## iter  20 value 158.553864
## iter  30 value 144.729055
## iter  40 value 142.163156
## iter  50 value 141.888125
## iter  60 value 141.884665
## final  value 141.884654 
## converged
## # weights:  551
## initial  value 4065.969904 
## iter  10 value 279.814072
## iter  20 value 158.080398
## iter  30 value 144.442621
## iter  40 value 144.415600
## iter  50 value 144.227084
## iter  60 value 138.602899
## iter  70 value 132.219208
## iter  80 value 132.215246
## iter  90 value 132.208216
## iter 100 value 132.199930
## final  value 132.199930 
## stopped after 100 iterations
## # weights:  111
## initial  value 4863.301460 
## iter  10 value 3684.161125
## final  value 143.468803 
## converged
## # weights:  331
## initial  value 3871.763688 
## iter  10 value 168.167637
## iter  20 value 143.468733
## iter  30 value 143.465361
## iter  40 value 143.460184
## final  value 143.460041 
## converged
## # weights:  551
## initial  value 1810.874698 
## iter  10 value 143.473342
## iter  20 value 143.464590
## final  value 143.464585 
## converged
## # weights:  111
## initial  value 5366.555604 
## final  value 3499.929341 
## converged
## # weights:  331
## initial  value 9109.383314 
## final  value 3499.929341 
## converged
## # weights:  551
## initial  value 5006.613833 
## final  value 3499.929341 
## converged
## # weights:  111
## initial  value 6029.750183 
## iter  10 value 209.135200
## iter  20 value 141.551286
## iter  30 value 141.095911
## final  value 141.095025 
## converged
## # weights:  331
## initial  value 9410.118111 
## iter  10 value 657.145317
## iter  20 value 341.454146
## iter  30 value 324.024718
## iter  40 value 141.097836
## iter  50 value 141.097172
## iter  50 value 141.097172
## iter  60 value 139.178491
## iter  70 value 128.168223
## iter  80 value 120.280241
## final  value 120.279477 
## converged
## # weights:  551
## initial  value 4188.124093 
## iter  10 value 335.069541
## iter  20 value 256.282246
## iter  30 value 216.991302
## iter  40 value 180.439321
## iter  50 value 175.641101
## iter  60 value 131.184524
## iter  70 value 106.424319
## iter  80 value 105.710188
## iter  90 value 104.890573
## iter 100 value 93.035373
## final  value 93.035373 
## stopped after 100 iterations
## # weights:  111
## initial  value 9749.368625 
## iter  10 value 3499.973651
## final  value 137.267351 
## converged
## # weights:  331
## initial  value 8361.939043 
## iter  10 value 138.908253
## iter  20 value 137.266538
## iter  30 value 137.262973
## final  value 137.262947 
## converged
## # weights:  551
## initial  value 9272.388676 
## iter  10 value 3499.980225
## iter  20 value 137.270897
## iter  30 value 137.262677
## final  value 137.262624 
## converged
## # weights:  111
## initial  value 4893.252542 
## final  value 3499.929341 
## converged
## # weights:  331
## initial  value 4306.593417 
## final  value 3499.929341 
## converged
## # weights:  551
## initial  value 7518.358577 
## final  value 3499.929341 
## converged
## # weights:  111
## initial  value 6847.393971 
## iter  10 value 298.550134
## iter  20 value 141.712836
## iter  30 value 139.692939
## iter  40 value 139.191466
## iter  50 value 139.188095
## final  value 139.188085 
## converged
## # weights:  331
## initial  value 7290.372493 
## iter  10 value 196.396755
## iter  20 value 139.837125
## iter  30 value 139.195614
## iter  40 value 139.186471
## iter  50 value 138.541517
## iter  60 value 127.397522
## iter  70 value 122.548703
## iter  80 value 122.452490
## iter  90 value 122.028228
## iter 100 value 121.671367
## final  value 121.671367 
## stopped after 100 iterations
## # weights:  551
## initial  value 2545.235785 
## iter  10 value 140.142632
## iter  20 value 139.144478
## iter  30 value 138.045749
## iter  40 value 138.028370
## iter  50 value 138.018536
## iter  60 value 137.986209
## iter  70 value 137.906716
## iter  80 value 137.906600
## iter  90 value 137.906253
## iter 100 value 137.906098
## final  value 137.906098 
## stopped after 100 iterations
## # weights:  111
## initial  value 4942.055139 
## iter  10 value 3500.312217
## final  value 137.267121 
## converged
## # weights:  331
## initial  value 8756.743013 
## iter  10 value 154.377204
## iter  20 value 137.268065
## iter  30 value 137.263659
## final  value 137.263594 
## converged
## # weights:  551
## initial  value 3260.654554 
## iter  10 value 137.273845
## iter  20 value 137.268184
## iter  30 value 137.207772
## iter  40 value 131.464655
## iter  50 value 128.101129
## iter  60 value 124.182892
## iter  70 value 122.693824
## final  value 122.692758 
## converged
## # weights:  111
## initial  value 4639.640062 
## iter  10 value 216.354776
## final  value 212.827108 
## converged
Adult_TDA_PC_5.50.5_n5_NN1Fit0
## Neural Network 
## 
## 14404 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9604, 9602, 9602 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##   1     0e+00  0.9979867  0.0000000
##   1     1e-04  0.9979867  0.0000000
##   1     1e-01  0.9979867  0.0000000
##   3     0e+00  0.9979867  0.0000000
##   3     1e-04  0.9979867  0.0000000
##   3     1e-01  0.9979867  0.0000000
##   5     0e+00  0.9979867  0.0000000
##   5     1e-04  0.9979867  0.0000000
##   5     1e-01  0.9979173  0.0510107
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 1 and decay = 0.1.
Adult_TDA_PC_5.50.5_n5_NN1Fit0$resample
##    Accuracy Kappa Resample
## 1 0.9981250     0    Fold1
## 2 0.9979175     0    Fold2
## 3 0.9979175     0    Fold3
ad_tda_pc_5.50.5_n5_nn1_fit_re<-Adult_TDA_PC_5.50.5_n5_NN1Fit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n5_NN1Fit0)
## a 108-1-1 network with 111 weights
## options were - entropy fitting  decay=0.1
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00    -0.01     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##  b->o h1->o 
## -6.16  0.00
# Predict outcome using Adult_TDA_PC_5.50.5_n5_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n5_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n5_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n5_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n5_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n5_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_pc_5.50.5_n5_nn1_cf0_ov_acc<-ad_tda_pc_5.50.5_n5_nn1_cf0$overall[1]
ad_tda_pc_5.50.5_n5_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_pc_5.50.5_n5_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n5_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_nn1_n5_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.50.5_n5_nn1_fit_re)
diff_tda_pca_5.50.5_nn1_n5_3_fold
##     Accuracy
## 1 -0.2055753
## 2 -0.1880992
## 3 -0.2016554
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nn1.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nn1.n5_3_fold_odds.left<-bst_tda_pca_5.50.5_nn1.n5_3_fold$probLeft/bst_tda_pca_5.50.5_nn1.n5_3_fold$probRight
bst_tda_pca_5.50.5_nn1.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nn1.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n5_3_fold
## $winLeft
## [1] 0.9914667
## 
## $winRope
## [1] 0.008533333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nn1.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n5_3_fold
## $left
## [1] 0.9994746
## 
## $rope
## [1] 9.586431e-05
## 
## $right
## [1] 0.0004295431
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nn1_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1_n5_3_fold))
#bf_tda_pca_5.50.5_nn1.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nn1_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_nn1_n5_3_fold)
## t = -37.482, df = 2, p-value = 0.000711
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2212232 -0.1756633
## sample estimates:
##  mean of x 
## -0.1984433
### Test set diff
diff_tda_pca_5.50.5_nn1.n5_test<-(nn1_cf_ov_acc - ad_tda_pc_5.50.5_n5_nn1_cf0_ov_acc)
diff_tda_pca_5.50.5_nn1.n5_test
##    Accuracy 
## 0.006654382
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nn1.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nn1.n5_test),-0.01,0.01)
bst_tda_pca_5.50.5_nn1.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nn1.n5_test_odds.left<-bst_tda_pca_5.50.5_nn1.n5_test$probLeft/bst_tda_pca_5.50.5_nn1.n5_test$probRight
bst_tda_pca_5.50.5_nn1.n5_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nn1.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nn1.n5_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nn1.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nn1.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nn1.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nn1.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nn1.n5_test)))

#BayesFactor
#bf_tda_pca_5.50.5_nn1.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nn1.n5_test)) #bf_tda_pca_5.50.5_nn1.n5_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nn1.n5_test))


##With TDA KDE filter 5 intervals, 50% overlap, 5 bins 
##Node1

#Neural Network 1
Adult_TDA_KDE_5.50.5_n1_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n1.vec, 
                       method = 'nnet', 
                          trControl = fitControl,
                       metric='Accuracy')
## # weights:  111
## initial  value 5526.698305 
## final  value 5106.371841 
## converged
## # weights:  331
## initial  value 5248.781173 
## iter  10 value 5098.269886
## final  value 5098.264916 
## converged
## # weights:  551
## initial  value 7455.173102 
## iter  10 value 5092.857774
## final  value 5092.853842 
## converged
## # weights:  111
## initial  value 6443.035962 
## iter  10 value 5106.427022
## iter  10 value 5106.427011
## iter  10 value 5106.427011
## final  value 5106.427011 
## converged
## # weights:  331
## initial  value 5912.284564 
## iter  10 value 4900.323691
## iter  20 value 4666.332674
## iter  30 value 4609.708910
## iter  40 value 4554.979252
## iter  50 value 4536.200380
## iter  60 value 4427.575758
## iter  70 value 4380.027565
## iter  80 value 4369.771840
## final  value 4369.425596 
## converged
## # weights:  551
## initial  value 5765.977215 
## iter  10 value 4984.034887
## iter  20 value 4975.288246
## iter  30 value 4889.093784
## iter  40 value 4795.791603
## iter  50 value 4445.370098
## iter  60 value 4411.188085
## iter  70 value 4407.932530
## iter  80 value 4378.478831
## iter  90 value 3771.155084
## iter 100 value 3253.929669
## final  value 3253.929669 
## stopped after 100 iterations
## # weights:  111
## initial  value 6893.521399 
## iter  10 value 5105.154162
## final  value 5105.136147 
## converged
## # weights:  331
## initial  value 5688.616772 
## iter  10 value 5098.281703
## final  value 5098.280574 
## converged
## # weights:  551
## initial  value 10018.499202 
## iter  10 value 5086.126392
## final  value 5086.108199 
## converged
## # weights:  111
## initial  value 5636.473977 
## iter  10 value 5095.861179
## final  value 5095.859120 
## converged
## # weights:  331
## initial  value 6699.734921 
## iter  10 value 5098.591097
## final  value 5098.564222 
## converged
## # weights:  551
## initial  value 5294.539914 
## iter  10 value 5015.483885
## final  value 5015.473289 
## converged
## # weights:  111
## initial  value 6861.999373 
## iter  10 value 5108.080546
## final  value 5108.077114 
## converged
## # weights:  331
## initial  value 7555.132131 
## iter  10 value 5108.062099
## iter  20 value 5097.497667
## iter  30 value 4663.353197
## iter  40 value 4646.782252
## iter  50 value 4540.362684
## iter  60 value 4477.124651
## iter  70 value 4440.796368
## iter  80 value 4436.604599
## iter  90 value 4436.101803
## iter 100 value 4416.394894
## final  value 4416.394894 
## stopped after 100 iterations
## # weights:  551
## initial  value 5113.853322 
## iter  10 value 5094.648420
## iter  20 value 4696.882502
## iter  30 value 4612.188554
## iter  40 value 4597.323036
## iter  50 value 4521.838321
## iter  60 value 4486.918271
## iter  70 value 4332.145529
## iter  80 value 4296.410882
## iter  90 value 4024.071819
## iter 100 value 3888.071389
## final  value 3888.071389 
## stopped after 100 iterations
## # weights:  111
## initial  value 5089.661545 
## final  value 5023.733875 
## converged
## # weights:  331
## initial  value 6369.146671 
## iter  10 value 5097.241225
## final  value 5097.234305 
## converged
## # weights:  551
## initial  value 5438.701803 
## final  value 5087.762402 
## converged
## # weights:  111
## initial  value 5406.082632 
## iter  10 value 5069.842041
## final  value 5069.841978 
## converged
## # weights:  331
## initial  value 5891.953652 
## iter  10 value 5098.274196
## final  value 5098.264913 
## converged
## # weights:  551
## initial  value 5079.150774 
## final  value 4973.467272 
## converged
## # weights:  111
## initial  value 5456.756104 
## iter  10 value 5106.461195
## final  value 5106.428428 
## converged
## # weights:  331
## initial  value 5638.489381 
## iter  10 value 5085.600093
## iter  20 value 4753.374219
## iter  30 value 4549.825798
## iter  40 value 4498.514602
## iter  50 value 4454.672100
## iter  60 value 4313.062623
## iter  70 value 4056.540260
## iter  80 value 3620.227700
## iter  90 value 3442.961651
## iter 100 value 3354.111328
## final  value 3354.111328 
## stopped after 100 iterations
## # weights:  551
## initial  value 5850.303955 
## iter  10 value 4963.364954
## iter  20 value 4913.600479
## iter  30 value 4726.232618
## iter  40 value 4697.743133
## iter  50 value 4654.011990
## iter  60 value 4596.259691
## iter  70 value 4572.060065
## iter  80 value 4436.110707
## iter  90 value 4404.714496
## iter 100 value 4375.445317
## final  value 4375.445317 
## stopped after 100 iterations
## # weights:  111
## initial  value 5229.182132 
## final  value 5106.373518 
## converged
## # weights:  331
## initial  value 5953.190145 
## iter  10 value 5028.164818
## iter  20 value 5026.861945
## final  value 5026.861499 
## converged
## # weights:  551
## initial  value 8041.185505 
## final  value 5067.122600 
## converged
## # weights:  551
## initial  value 10963.089329 
## iter  10 value 7498.318795
## iter  20 value 7054.542786
## iter  30 value 6813.553502
## iter  40 value 6790.106568
## iter  50 value 6784.170997
## iter  60 value 6779.054410
## iter  70 value 6776.057442
## iter  80 value 6744.074435
## iter  90 value 6572.303336
## iter 100 value 6364.221561
## final  value 6364.221561 
## stopped after 100 iterations
Adult_TDA_KDE_5.50.5_n1_NN1Fit0
## Neural Network 
## 
## 13387 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8924, 8926, 8924 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa      
##   1     0e+00  0.7422126  0.008078572
##   1     1e-04  0.7436327  0.016255750
##   1     1e-01  0.7407933  0.000000000
##   3     0e+00  0.7414657  0.003837379
##   3     1e-04  0.7431088  0.013134662
##   3     1e-01  0.8012242  0.407262917
##   5     0e+00  0.7481888  0.042258084
##   5     1e-04  0.7431838  0.013601123
##   5     1e-01  0.8110834  0.422526001
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 5 and decay = 0.1.
Adult_TDA_KDE_5.50.5_n1_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.7985660 0.3541570    Fold3
## 2 0.8371051 0.6044316    Fold1
## 3 0.7975790 0.3089894    Fold2
ad_tda_kde_5.50.5_n1_nn1_fit_re<-Adult_TDA_KDE_5.50.5_n1_NN1Fit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n1_NN1Fit0)
## a 108-5-1 network with 551 weights
## options were - entropy fitting  decay=0.1
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.00     0.00     0.00     0.29     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##    -0.87     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     0.32     2.23     0.24     0.06     0.05     0.00     0.15    -0.10 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##    -0.09     0.00     0.00     0.00     0.14     0.16    -0.13     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00    -0.01     0.00    -0.17     0.15    -0.06    -0.02    -0.09 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.02     0.00     0.05     0.29     2.37    -0.03     0.00     0.08 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.00     0.35    -0.04    -0.04     0.24     0.00     0.00     0.07 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##     0.14    -0.03     0.01    -0.10    -0.06     0.00     0.30     0.09 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##    -0.19     0.00    -0.17     0.07     0.09    -0.14     0.26     0.03 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.02     0.00     0.11    -0.01     0.00     0.22    -0.14     0.45 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##    -0.09     0.00     5.65    -0.15     0.00     0.00     0.00     0.00 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00     0.00     0.00     0.18     0.00     0.00     0.00     0.00 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.10     0.00 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00     0.00     0.00     0.00     0.00    -0.04     0.00     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.00    -0.01     0.00     0.00     0.05     0.00     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00     0.44     0.00    -0.26 
##    b->h4   i1->h4   i2->h4   i3->h4   i4->h4   i5->h4   i6->h4   i7->h4 
##    -0.04    -1.55     0.00     0.04     0.00     0.00    -0.01    -0.03 
##   i8->h4   i9->h4  i10->h4  i11->h4  i12->h4  i13->h4  i14->h4  i15->h4 
##    -0.04     0.00     0.00     0.00    -0.02     0.01     0.00     0.00 
##  i16->h4  i17->h4  i18->h4  i19->h4  i20->h4  i21->h4  i22->h4  i23->h4 
##     0.00    -0.02     0.00     0.00     0.01    -0.05     0.03    -0.03 
##  i24->h4  i25->h4  i26->h4  i27->h4  i28->h4  i29->h4  i30->h4  i31->h4 
##     0.03     0.00     0.00     0.00    -0.09    -0.02     0.00     0.07 
##  i32->h4  i33->h4  i34->h4  i35->h4  i36->h4  i37->h4  i38->h4  i39->h4 
##     0.00    -0.09     0.00     0.00     0.00    -0.02     0.00     0.01 
##  i40->h4  i41->h4  i42->h4  i43->h4  i44->h4  i45->h4  i46->h4  i47->h4 
##    -0.01    -0.03    -0.02     0.00     0.00     0.00     0.01    -0.01 
##  i48->h4  i49->h4  i50->h4  i51->h4  i52->h4  i53->h4  i54->h4  i55->h4 
##     0.05     0.00    -0.01     0.07    -0.07    -0.04     0.00     0.00 
##  i56->h4  i57->h4  i58->h4  i59->h4  i60->h4  i61->h4  i62->h4  i63->h4 
##    -0.01     0.00     0.01     0.00     0.00    -0.06    -0.07     0.03 
##  i64->h4  i65->h4  i66->h4  i67->h4  i68->h4  i69->h4  i70->h4  i71->h4 
##     0.11     0.00    -1.05     0.00     0.01     0.00     0.00     0.00 
##  i72->h4  i73->h4  i74->h4  i75->h4  i76->h4  i77->h4  i78->h4  i79->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h4  i81->h4  i82->h4  i83->h4  i84->h4  i85->h4  i86->h4  i87->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.01     0.00 
##  i88->h4  i89->h4  i90->h4  i91->h4  i92->h4  i93->h4  i94->h4  i95->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h4  i97->h4  i98->h4  i99->h4 i100->h4 i101->h4 i102->h4 i103->h4 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4 
##     0.00     0.00    -0.05     0.00     0.00 
##    b->h5   i1->h5   i2->h5   i3->h5   i4->h5   i5->h5   i6->h5   i7->h5 
##    -0.22    -0.04    -0.06     0.04     0.02     0.00    -0.25     0.03 
##   i8->h5   i9->h5  i10->h5  i11->h5  i12->h5  i13->h5  i14->h5  i15->h5 
##     0.00     0.00     0.00     0.00    -0.09    -0.16    -0.02     0.00 
##  i16->h5  i17->h5  i18->h5  i19->h5  i20->h5  i21->h5  i22->h5  i23->h5 
##     0.01    -0.04    -0.03    -0.02    -0.02     0.01     0.07    -0.02 
##  i24->h5  i25->h5  i26->h5  i27->h5  i28->h5  i29->h5  i30->h5  i31->h5 
##     0.10     0.00     0.08    -0.09     0.13    -0.06     0.00     0.23 
##  i32->h5  i33->h5  i34->h5  i35->h5  i36->h5  i37->h5  i38->h5  i39->h5 
##    -0.01    -0.34     0.00    -0.04    -0.05    -0.05     0.00    -0.05 
##  i40->h5  i41->h5  i42->h5  i43->h5  i44->h5  i45->h5  i46->h5  i47->h5 
##     0.10    -0.01    -0.03    -0.04    -0.12    -0.01     0.14     0.00 
##  i48->h5  i49->h5  i50->h5  i51->h5  i52->h5  i53->h5  i54->h5  i55->h5 
##    -0.06    -0.01    -0.04     0.23    -0.19    -0.02    -0.20    -0.05 
##  i56->h5  i57->h5  i58->h5  i59->h5  i60->h5  i61->h5  i62->h5  i63->h5 
##     0.01    -0.02     0.00     0.00    -0.01    -0.19    -0.32     0.09 
##  i64->h5  i65->h5  i66->h5  i67->h5  i68->h5  i69->h5  i70->h5  i71->h5 
##     0.00     0.00     0.01     0.01     0.00     0.00     0.00     0.00 
##  i72->h5  i73->h5  i74->h5  i75->h5  i76->h5  i77->h5  i78->h5  i79->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h5  i81->h5  i82->h5  i83->h5  i84->h5  i85->h5  i86->h5  i87->h5 
##     0.00     0.00     0.00     0.00     0.00     0.01     0.00     0.00 
##  i88->h5  i89->h5  i90->h5  i91->h5  i92->h5  i93->h5  i94->h5  i95->h5 
##     0.00     0.00     0.00     0.00     0.00     0.05     0.00     0.00 
##  i96->h5  i97->h5  i98->h5  i99->h5 i100->h5 i101->h5 i102->h5 i103->h5 
##     0.00     0.00     0.00     0.00     0.00     0.00    -0.01     0.01 
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5 
##     0.00     0.00    -0.27     0.00     0.00 
##  b->o h1->o h2->o h3->o h4->o h5->o 
##  1.43  1.43 -1.47 -2.93  0.68  1.08
# Predict outcome using Adult_TDA_KDE_5.50.5_n1_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n1_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n1_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n1_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7373  1914
##      >50K      43   438
##                                           
##                Accuracy : 0.7997          
##                  95% CI : (0.7916, 0.8076)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.2477          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9942          
##             Specificity : 0.1862          
##          Pos Pred Value : 0.7939          
##          Neg Pred Value : 0.9106          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7548          
##    Detection Prevalence : 0.9508          
##       Balanced Accuracy : 0.5902          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n1_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7373  1914
##      >50K      43   438
##                                           
##                Accuracy : 0.7997          
##                  95% CI : (0.7916, 0.8076)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.2477          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9942          
##             Specificity : 0.1862          
##          Pos Pred Value : 0.7939          
##          Neg Pred Value : 0.9106          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7548          
##    Detection Prevalence : 0.9508          
##       Balanced Accuracy : 0.5902          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n1_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.996519e-01   2.477023e-01   7.915743e-01   8.075505e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   7.963235e-22   0.000000e+00
ad_tda_kde_5.50.5_n1_nn1_cf0_ov_acc<-ad_tda_kde_5.50.5_n1_nn1_cf0$overall[1]
ad_tda_kde_5.50.5_n1_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9942017            0.1862245            0.7939055 
##       Neg Pred Value            Precision               Recall 
##            0.9106029            0.7939055            0.9942017 
##                   F1           Prevalence       Detection Rate 
##            0.8828354            0.7592138            0.7548116 
## Detection Prevalence    Balanced Accuracy 
##            0.9507576            0.5902131
ad_tda_kde_5.50.5_n1_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n1_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_nn1_n1_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.50.5_n1_nn1_fit_re)
diff_tda_kde_5.50.5_nn1_n1_3_fold
##       Accuracy
## 1 -0.006016296
## 2 -0.027286713
## 3 -0.001316844
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nn1.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n1_3_fold
## $probLeft
## [1] 0.25
## 
## $probRope
## [1] 0.75
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nn1.n1_3_fold_odds.left<-bst_tda_kde_5.50.5_nn1.n1_3_fold$probLeft/bst_tda_kde_5.50.5_nn1.n1_3_fold$probRight
bst_tda_kde_5.50.5_nn1.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nn1.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n1_3_fold
## $winLeft
## [1] 0.4184
## 
## $winRope
## [1] 0.5816
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nn1.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n1_3_fold
## $left
## [1] 0.5586103
## 
## $rope
## [1] 0.3690579
## 
## $right
## [1] 0.07233177
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nn1_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1_n1_3_fold))
#bf_tda_kde_5.50.5_nn1.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nn1_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_nn1_n1_3_fold)
## t = -1.4444, df = 2, p-value = 0.2855
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.04591557  0.02283567
## sample estimates:
##   mean of x 
## -0.01153995
### Test set diff
diff_tda_kde_5.50.5_nn1.n1_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n1_nn1_cf0_ov_acc)
diff_tda_kde_5.50.5_nn1.n1_test
##  Accuracy 
## 0.0513923
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nn1.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1.n1_test),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nn1.n1_test_odds.left<-bst_tda_kde_5.50.5_nn1.n1_test$probLeft/bst_tda_kde_5.50.5_nn1.n1_test$probRight
bst_tda_kde_5.50.5_nn1.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nn1.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1.n1_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1605333
## 
## $winRight
## [1] 0.8394667
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nn1.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nn1.n1_test)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1.n1_test)) #bf_tda_pca_5.50.5_nn1.n1_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nn1.n1_test))

##Node2

#Neural Network 1
Adult_TDA_KDE_5.50.5_n2_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n3.vec, 
                       method = 'nnet', 
                          trControl = fitControl,
                       metric='Accuracy')
## # weights:  111
## initial  value 5930.683450 
## iter  10 value 4245.284346
## final  value 4245.201290 
## converged
## # weights:  331
## initial  value 6177.305703 
## iter  10 value 4558.071222
## final  value 4558.069553 
## converged
## # weights:  551
## initial  value 6763.207906 
## final  value 4536.086163 
## converged
## # weights:  111
## initial  value 5432.666658 
## iter  10 value 4569.717617
## final  value 4569.714463 
## converged
## # weights:  331
## initial  value 6147.890732 
## iter  10 value 4536.136799
## iter  20 value 4230.299486
## iter  30 value 3723.255967
## iter  40 value 3040.259416
## iter  50 value 2876.935687
## iter  60 value 2755.809998
## iter  70 value 2667.253969
## iter  80 value 2626.619230
## iter  90 value 2605.630950
## iter 100 value 2596.702578
## final  value 2596.702578 
## stopped after 100 iterations
## # weights:  551
## initial  value 13190.555196 
## iter  10 value 4540.636995
## iter  20 value 4280.772679
## iter  30 value 4215.750301
## iter  40 value 4183.082260
## iter  50 value 4174.225978
## iter  60 value 4172.830365
## iter  70 value 4149.006510
## iter  80 value 4094.267350
## iter  90 value 4038.300268
## iter 100 value 3611.170139
## final  value 3611.170139 
## stopped after 100 iterations
## # weights:  111
## initial  value 6189.887267 
## iter  10 value 4561.954327
## final  value 4561.950369 
## converged
## # weights:  331
## initial  value 4571.984179 
## final  value 4569.673648 
## converged
## # weights:  551
## initial  value 7086.593553 
## iter  10 value 4560.672329
## final  value 4560.668546 
## converged
## # weights:  111
## initial  value 6368.448984 
## final  value 4569.667984 
## converged
## # weights:  331
## initial  value 6869.756319 
## iter  10 value 4552.907759
## final  value 4552.905853 
## converged
## # weights:  551
## initial  value 4581.189338 
## iter  10 value 4563.231076
## final  value 4563.227810 
## converged
## # weights:  111
## initial  value 9105.335753 
## iter  10 value 4569.760549
## iter  20 value 4569.714993
## final  value 4569.714517 
## converged
## # weights:  331
## initial  value 5016.469773 
## iter  10 value 4286.211602
## iter  20 value 4232.321762
## iter  30 value 4192.595077
## iter  40 value 4133.398231
## iter  50 value 4119.000614
## iter  60 value 3995.410127
## iter  70 value 3587.180721
## iter  80 value 3128.014790
## iter  90 value 3058.844529
## iter 100 value 2954.094340
## final  value 2954.094340 
## stopped after 100 iterations
## # weights:  551
## initial  value 4672.344253 
## iter  10 value 4569.950375
## iter  20 value 4569.701868
## final  value 4569.699022 
## converged
## # weights:  111
## initial  value 6983.164070 
## final  value 4569.670079 
## converged
## # weights:  331
## initial  value 8836.452966 
## iter  10 value 4500.327090
## final  value 4500.267674 
## converged
## # weights:  551
## initial  value 5061.019809 
## final  value 4530.966531 
## converged
## # weights:  111
## initial  value 7247.744150 
## iter  10 value 4567.424743
## final  value 4567.415698 
## converged
## # weights:  331
## initial  value 6033.259818 
## iter  10 value 4545.477051
## final  value 4545.470615 
## converged
## # weights:  551
## initial  value 5243.738886 
## final  value 4545.476644 
## converged
## # weights:  111
## initial  value 5493.113260 
## iter  10 value 4568.752349
## final  value 4568.750038 
## converged
## # weights:  331
## initial  value 9145.208968 
## iter  10 value 4568.764395
## iter  20 value 4568.734837
## final  value 4568.734565 
## converged
## # weights:  551
## initial  value 4797.094928 
## iter  10 value 4567.944042
## iter  20 value 4551.149530
## iter  30 value 4229.303227
## iter  40 value 4187.004430
## iter  50 value 4175.337976
## iter  60 value 4159.809359
## iter  70 value 4156.397556
## iter  80 value 4153.754334
## iter  90 value 4089.733266
## iter 100 value 3831.012797
## final  value 3831.012797 
## stopped after 100 iterations
## # weights:  111
## initial  value 8212.545037 
## final  value 4568.705829 
## converged
## # weights:  331
## initial  value 5869.733883 
## iter  10 value 4567.439861
## final  value 4567.430222 
## converged
## # weights:  551
## initial  value 4704.895635 
## iter  10 value 4368.942914
## final  value 4368.932833 
## converged
## # weights:  331
## initial  value 6962.811008 
## iter  10 value 6718.625618
## iter  20 value 6693.873473
## iter  30 value 6520.663645
## iter  40 value 6390.530236
## iter  50 value 6306.136667
## iter  60 value 6270.485697
## iter  70 value 6129.785849
## iter  80 value 5901.303754
## iter  90 value 5184.129302
## iter 100 value 5010.874846
## final  value 5010.874846 
## stopped after 100 iterations
Adult_TDA_KDE_5.50.5_n2_NN1Fit0
## Neural Network 
## 
## 11634 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7756, 7756, 7756 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa      
##   1     0e+00  0.7359464  0.069090191
##   1     1e-04  0.7243424  0.001801554
##   1     1e-01  0.7239986  0.000000000
##   3     0e+00  0.7258037  0.009441320
##   3     1e-04  0.7260615  0.010987016
##   3     1e-01  0.7835654  0.367469242
##   5     0e+00  0.7265773  0.013448250
##   5     1e-04  0.7337975  0.051698650
##   5     1e-01  0.7539969  0.172924769
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.1.
Adult_TDA_KDE_5.50.5_n2_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.8269727 0.5552632    Fold1
## 2 0.7998969 0.5471445    Fold2
## 3 0.7238267 0.0000000    Fold3
ad_tda_kde_5.50.5_n2_nn1_fit_re<-Adult_TDA_KDE_5.50.5_n2_NN1Fit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n2_NN1Fit0)
## a 108-3-1 network with 331 weights
## options were - entropy fitting  decay=0.1
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.18     0.18    -2.47     3.77    -0.35    -0.03     0.84     3.49 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##    -4.36    -0.74     0.03     0.00     0.00    -0.28    -5.34     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     7.40     4.03    12.79     0.00    -1.94 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##   -18.76     0.00     0.00     2.28    -2.62    -0.94     0.59     4.24 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##    -0.63    -0.69    -1.45    -0.94    -2.50     0.13    -0.18     0.53 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     3.84    -8.50    -2.26     0.80    -3.95     0.10     1.96     5.20 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     2.11     2.03     0.86     1.16    -0.81    -0.53    -2.17    -1.77 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     4.30    -2.39     2.41     1.50    -2.63     1.29     0.39    -0.21 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.00     0.18    -1.80     0.86     1.17    -0.20    -0.11 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.16    -0.25    -0.70     0.07     1.80     0.61     2.11    -1.28 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##    -0.44    -0.08     0.00     0.08    -0.09    -0.13    -0.34     0.43 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.45    -0.60     0.45     0.19    -0.10    -1.28     0.16     0.04 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##    -0.16     2.85    -0.01    -0.27    -0.63    -0.02     0.53    -0.90 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##    -0.12    -0.03    -0.66    -1.68     0.08 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     0.64     1.70     0.08     0.26    -0.40     0.00     0.17     0.07 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.65    -0.26     0.07     0.00     0.00     0.00     0.02     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00    -0.11    -0.01     2.13     0.00    -0.43 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##    -1.57     0.00     0.00     0.60     6.63    -1.60     0.00     2.80 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00    -0.10     0.27    -0.74     0.08    -0.58     0.00     0.46 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##    -0.79     0.48     0.07     0.01     0.13     0.00     0.92     0.00 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##    -0.34     0.19     0.01     1.57    -0.88    -0.27     0.23    -1.24 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     1.23     0.01     0.00    -0.19     0.03     0.79     0.85    -0.21 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##    -0.10     0.00     6.29    -0.17     0.00     0.04     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##    -0.13     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.00     0.00     0.00     0.00     0.07     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.00     0.00     0.00     0.01     0.00     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00     0.81     0.00     0.00 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00     0.00     0.00     0.00 
##   b->o  h1->o  h2->o  h3->o 
##   0.96   2.60  -4.09   0.95
# Predict outcome using Adult_TDA_KDE_5.50.5_n2_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n2_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n2_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n2_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6131   967
##      >50K    1285  1385
##                                          
##                Accuracy : 0.7695         
##                  95% CI : (0.761, 0.7778)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 0.009036       
##                                          
##                   Kappa : 0.3972         
##                                          
##  Mcnemar's Test P-Value : 2.39e-11       
##                                          
##             Sensitivity : 0.8267         
##             Specificity : 0.5889         
##          Pos Pred Value : 0.8638         
##          Neg Pred Value : 0.5187         
##              Prevalence : 0.7592         
##          Detection Rate : 0.6277         
##    Detection Prevalence : 0.7267         
##       Balanced Accuracy : 0.7078         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.50.5_n2_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6131   967
##      >50K    1285  1385
##                                          
##                Accuracy : 0.7695         
##                  95% CI : (0.761, 0.7778)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 0.009036       
##                                          
##                   Kappa : 0.3972         
##                                          
##  Mcnemar's Test P-Value : 2.39e-11       
##                                          
##             Sensitivity : 0.8267         
##             Specificity : 0.5889         
##          Pos Pred Value : 0.8638         
##          Neg Pred Value : 0.5187         
##              Prevalence : 0.7592         
##          Detection Rate : 0.6277         
##    Detection Prevalence : 0.7267         
##       Balanced Accuracy : 0.7078         
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.50.5_n2_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.694513e-01   3.972488e-01   7.609679e-01   7.777737e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   9.035912e-03   2.389772e-11
ad_tda_kde_5.50.5_n2_nn1_cf0_ov_acc<-ad_tda_kde_5.50.5_n2_nn1_cf0$overall[1]
ad_tda_kde_5.50.5_n2_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.8267260            0.5888605            0.8637644 
##       Neg Pred Value            Precision               Recall 
##            0.5187266            0.8637644            0.8267260 
##                   F1           Prevalence       Detection Rate 
##            0.8448395            0.7592138            0.6276618 
## Detection Prevalence    Balanced Accuracy 
##            0.7266585            0.7077933
ad_tda_kde_5.50.5_n2_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n2_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_nn1_n2_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.50.5_n2_nn1_fit_re)
diff_tda_kde_5.50.5_nn1_n2_3_fold
##       Accuracy
## 1 -0.034422976
## 2  0.009921519
## 3  0.072435459
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nn1.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n2_3_fold
## $probLeft
## [1] 0.25
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nn1.n2_3_fold_odds.left<-bst_tda_kde_5.50.5_nn1.n2_3_fold$probLeft/bst_tda_kde_5.50.5_nn1.n2_3_fold$probRight
bst_tda_kde_5.50.5_nn1.n2_3_fold_odds.left
## [1] 1
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nn1.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n2_3_fold
## $winLeft
## [1] 0.3036333
## 
## $winRope
## [1] 0.2013333
## 
## $winRight
## [1] 0.4950333
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nn1.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n2_3_fold
## $left
## [1] 0.2716939
## 
## $rope
## [1] 0.1696608
## 
## $right
## [1] 0.5586452
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nn1_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1_n2_3_fold))
#bf_tda_kde_5.50.5_nn1.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nn1_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_nn1_n2_3_fold)
## t = 0.51549, df = 2, p-value = 0.6575
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1173855  0.1493415
## sample estimates:
## mean of x 
##  0.015978
### Test set diff
diff_tda_kde_5.50.5_nn1.n2_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n2_nn1_cf0_ov_acc)
diff_tda_kde_5.50.5_nn1.n2_test
##   Accuracy 
## 0.08159296
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nn1.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1.n2_test),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nn1.n2_test_odds.left<-bst_tda_kde_5.50.5_nn1.n2_test$probLeft/bst_tda_kde_5.50.5_nn1.n2_test$probRight
bst_tda_kde_5.50.5_nn1.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nn1.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1.n2_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1581333
## 
## $winRight
## [1] 0.8418667
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nn1.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nn1.n2_test)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1.n2_test)) #bf_tda_pca_5.50.5_nn1.n2_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nn1.n2_test))

##Node3

#Neural Network 1
Adult_TDA_KDE_5.50.5_n3_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n3.vec, 
                       method = 'nnet', 
                          trControl = fitControl,
                       metric='Accuracy')
## # weights:  111
## initial  value 5111.768353 
## final  value 4569.990984 
## converged
## # weights:  331
## initial  value 6113.230122 
## iter  10 value 4567.422226
## final  value 4567.415688 
## converged
## # weights:  551
## initial  value 5390.153672 
## final  value 4569.990984 
## converged
## # weights:  111
## initial  value 5858.802256 
## iter  10 value 4569.862744
## iter  20 value 4569.493569
## iter  20 value 4569.493565
## iter  20 value 4569.493545
## final  value 4569.493545 
## converged
## # weights:  331
## initial  value 6291.630396 
## iter  10 value 4285.824677
## iter  20 value 4264.917247
## iter  30 value 4261.403305
## iter  40 value 4192.978770
## iter  50 value 4124.465058
## iter  60 value 4011.515063
## iter  70 value 3721.076385
## iter  80 value 3176.785477
## iter  90 value 2978.231608
## iter 100 value 2854.006342
## final  value 2854.006342 
## stopped after 100 iterations
## # weights:  551
## initial  value 4743.371175 
## iter  10 value 4535.542791
## iter  20 value 4278.385005
## iter  30 value 4256.005933
## iter  40 value 4252.347978
## iter  50 value 4247.616447
## iter  60 value 4157.823899
## iter  70 value 4100.851549
## iter  80 value 4080.399334
## iter  90 value 4073.240521
## iter 100 value 4062.148586
## final  value 4062.148586 
## stopped after 100 iterations
## # weights:  111
## initial  value 5870.725150 
## iter  10 value 4504.513623
## final  value 4504.512577 
## converged
## # weights:  331
## initial  value 5056.378901 
## final  value 4544.198727 
## converged
## # weights:  551
## initial  value 4827.363092 
## iter  10 value 4264.968614
## iter  20 value 4251.609850
## iter  30 value 4226.223515
## iter  40 value 4197.273919
## iter  50 value 4126.666326
## iter  60 value 4090.425997
## iter  70 value 4086.823980
## iter  80 value 4086.779035
## iter  90 value 4086.762395
## iter 100 value 4086.734092
## final  value 4086.734092 
## stopped after 100 iterations
## # weights:  111
## initial  value 5057.833550 
## final  value 4521.809004 
## converged
## # weights:  331
## initial  value 4647.455245 
## final  value 4568.380621 
## converged
## # weights:  551
## initial  value 5119.972994 
## iter  10 value 4567.105034
## final  value 4567.092943 
## converged
## # weights:  111
## initial  value 5825.356477 
## iter  10 value 4567.868211
## iter  20 value 4327.892843
## iter  30 value 4326.095332
## iter  40 value 4275.518987
## iter  50 value 4275.513270
## iter  60 value 4275.483372
## iter  70 value 4260.705445
## iter  80 value 4249.317515
## iter  90 value 4249.311471
## final  value 4249.311070 
## converged
## # weights:  331
## initial  value 6170.957433 
## iter  10 value 4529.055743
## iter  20 value 4297.748446
## iter  30 value 4296.307591
## iter  40 value 4296.279113
## iter  50 value 4295.901059
## iter  60 value 4295.753028
## iter  70 value 4260.181726
## iter  80 value 4259.593290
## final  value 4259.515332 
## converged
## # weights:  551
## initial  value 5390.350278 
## iter  10 value 4552.917903
## iter  20 value 4550.842158
## iter  30 value 4335.217152
## iter  40 value 4261.673945
## iter  50 value 4136.906258
## iter  60 value 3475.642467
## iter  70 value 3258.259101
## iter  80 value 3218.795898
## iter  90 value 3201.442504
## iter 100 value 2993.550390
## final  value 2993.550390 
## stopped after 100 iterations
## # weights:  111
## initial  value 6949.952341 
## iter  10 value 4556.803008
## final  value 4556.798192 
## converged
## # weights:  331
## initial  value 5069.853912 
## final  value 4568.389443 
## converged
## # weights:  551
## initial  value 7425.933036 
## final  value 4551.635124 
## converged
## # weights:  111
## initial  value 5433.338943 
## final  value 4569.667984 
## converged
## # weights:  331
## initial  value 4618.390858 
## final  value 4569.667984 
## converged
## # weights:  551
## initial  value 6640.766715 
## final  value 4527.005257 
## converged
## # weights:  111
## initial  value 4854.557834 
## iter  10 value 4569.716558
## final  value 4569.714471 
## converged
## # weights:  331
## initial  value 5574.629260 
## iter  10 value 4305.357985
## iter  20 value 4271.507367
## iter  30 value 4246.195947
## iter  40 value 4199.713445
## iter  50 value 4070.178676
## iter  60 value 3969.948565
## iter  70 value 3904.249891
## iter  80 value 3887.178709
## iter  90 value 3869.052215
## iter 100 value 3853.483271
## final  value 3853.483271 
## stopped after 100 iterations
## # weights:  551
## initial  value 5103.624486 
## iter  10 value 4362.400626
## iter  20 value 4247.699437
## iter  30 value 4244.516902
## iter  40 value 4239.470383
## iter  50 value 4235.615855
## iter  60 value 4230.427499
## iter  70 value 4227.536574
## iter  80 value 4192.811040
## iter  90 value 4146.522688
## iter 100 value 4078.415094
## final  value 4078.415094 
## stopped after 100 iterations
## # weights:  111
## initial  value 4607.420878 
## final  value 4569.669911 
## converged
## # weights:  331
## initial  value 7367.058986 
## final  value 4540.006561 
## converged
## # weights:  551
## initial  value 4576.125759 
## final  value 4555.511492 
## converged
## # weights:  331
## initial  value 6861.407035 
## iter  10 value 6854.061122
## iter  20 value 6854.050875
## iter  20 value 6854.050852
## iter  20 value 6854.050850
## final  value 6854.050850 
## converged
Adult_TDA_KDE_5.50.5_n3_NN1Fit0
## Neural Network 
## 
## 11634 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7757, 7755, 7756 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa      
##   1     0e+00  0.7253736  0.007432493
##   1     1e-04  0.7266638  0.014618712
##   1     1e-01  0.7362011  0.066919109
##   3     0e+00  0.7239986  0.000000000
##   3     1e-04  0.7259759  0.010321338
##   3     1e-01  0.7924245  0.373825954
##   5     0e+00  0.7251160  0.005833098
##   5     1e-04  0.7398181  0.078811780
##   5     1e-01  0.7862277  0.369429172
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.1.
Adult_TDA_KDE_5.50.5_n3_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.8279598 0.5891119    Fold1
## 2 0.7615365 0.2138898    Fold2
## 3 0.7877772 0.3184761    Fold3
ad_tda_kde_5.50.5_n3_nn1_fit_re<-Adult_TDA_KDE_5.50.5_n3_NN1Fit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n3_NN1Fit0)
## a 108-3-1 network with 331 weights
## options were - entropy fitting  decay=0.1
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00     0.00     0.00     0.00 
##  b->o h1->o h2->o h3->o 
## -0.32 -0.32  0.00 -0.32
# Predict outcome using Adult_TDA_KDE_5.50.5_n3_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n3_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n3_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n3_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n3_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n3_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_kde_5.50.5_n3_nn1_cf0_ov_acc<-ad_tda_kde_5.50.5_n3_nn1_cf0$overall[1]
ad_tda_kde_5.50.5_n3_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_kde_5.50.5_n3_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n3_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_nn1_n3_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.50.5_n3_nn1_fit_re)
diff_tda_kde_5.50.5_nn1_n3_3_fold
##      Accuracy
## 1 -0.03541007
## 2  0.04828189
## 3  0.00848497
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nn1.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n3_3_fold
## $probLeft
## [1] 0.25
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nn1.n3_3_fold_odds.left<-bst_tda_kde_5.50.5_nn1.n3_3_fold$probLeft/bst_tda_kde_5.50.5_nn1.n3_3_fold$probRight
bst_tda_kde_5.50.5_nn1.n3_3_fold_odds.left
## [1] 1
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nn1.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n3_3_fold
## $winLeft
## [1] 0.3455333
## 
## $winRope
## [1] 0.3024
## 
## $winRight
## [1] 0.3520667
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nn1.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n3_3_fold
## $left
## [1] 0.3010407
## 
## $rope
## [1] 0.2353607
## 
## $right
## [1] 0.4635985
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nn1_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1_n3_3_fold))
#bf_tda_kde_5.50.5_nn1.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nn1_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_nn1_n3_3_fold)
## t = 0.29454, df = 2, p-value = 0.7961
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.09687379  0.11111165
## sample estimates:
##   mean of x 
## 0.007118931
### Test set diff
diff_tda_kde_5.50.5_nn1.n3_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n3_nn1_cf0_ov_acc)
diff_tda_kde_5.50.5_nn1.n3_test
##   Accuracy 
## 0.09183047
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nn1.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1.n3_test),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nn1.n3_test_odds.left<-bst_tda_kde_5.50.5_nn1.n3_test$probLeft/bst_tda_kde_5.50.5_nn1.n3_test$probRight
bst_tda_kde_5.50.5_nn1.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nn1.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1.n3_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1610333
## 
## $winRight
## [1] 0.8389667
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nn1.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nn1.n3_test)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1.n3_test)) #bf_tda_pca_5.50.5_nn1.n3_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nn1.n3_test))

##Node4

#Neural Network 1
Adult_TDA_KDE_5.50.5_n4_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n4.vec, 
                       method = 'nnet', 
                          trControl = fitControl,
                       metric='Accuracy')
## # weights:  111
## initial  value 5066.467343 
## iter  10 value 3207.383817
## final  value 3207.381382 
## converged
## # weights:  331
## initial  value 4589.421276 
## iter  10 value 3422.946660
## final  value 3422.939877 
## converged
## # weights:  551
## initial  value 4604.489331 
## iter  10 value 3422.965000
## final  value 3422.939875 
## converged
## # weights:  111
## initial  value 4140.704516 
## iter  10 value 3430.949158
## iter  20 value 3426.041741
## iter  30 value 3396.476475
## iter  40 value 3200.964362
## iter  50 value 3186.212937
## iter  60 value 3181.558674
## iter  70 value 3173.016967
## iter  80 value 3172.808814
## iter  90 value 3169.732589
## iter 100 value 3168.308596
## final  value 3168.308596 
## stopped after 100 iterations
## # weights:  331
## initial  value 9119.712345 
## iter  10 value 3430.820735
## final  value 3430.816255 
## converged
## # weights:  551
## initial  value 4762.434901 
## iter  10 value 3408.502221
## iter  20 value 3399.148089
## iter  30 value 3199.151884
## iter  40 value 3183.265668
## iter  50 value 3162.873674
## iter  60 value 3158.124753
## iter  70 value 3154.837865
## iter  80 value 3150.864466
## iter  90 value 3147.284386
## iter 100 value 3090.083991
## final  value 3090.083991 
## stopped after 100 iterations
## # weights:  111
## initial  value 5110.863447 
## final  value 3430.773967 
## converged
## # weights:  331
## initial  value 6727.564802 
## final  value 3430.777753 
## converged
## # weights:  551
## initial  value 3618.793346 
## final  value 3405.685457 
## converged
## # weights:  111
## initial  value 5063.069780 
## final  value 3432.571375 
## converged
## # weights:  331
## initial  value 5940.993863 
## iter  10 value 3431.024651
## final  value 3431.006520 
## converged
## # weights:  551
## initial  value 5062.121491 
## final  value 3432.571375 
## converged
## # weights:  111
## initial  value 5451.668315 
## iter  10 value 3432.749492
## iter  20 value 3338.812356
## iter  30 value 3224.438636
## iter  40 value 3224.231135
## iter  50 value 3224.119819
## iter  60 value 3223.737278
## iter  70 value 3208.759746
## iter  80 value 3172.877134
## iter  90 value 3158.238246
## iter 100 value 3122.697750
## final  value 3122.697750 
## stopped after 100 iterations
## # weights:  331
## initial  value 4731.073515 
## iter  10 value 3432.668497
## iter  20 value 3432.543345
## iter  30 value 3235.898568
## iter  40 value 3194.175044
## iter  50 value 3182.872083
## iter  60 value 3177.143853
## iter  70 value 3173.572455
## iter  80 value 3173.106229
## iter  90 value 3169.662369
## iter 100 value 3134.153695
## final  value 3134.153695 
## stopped after 100 iterations
## # weights:  551
## initial  value 7808.738296 
## iter  10 value 3412.016828
## iter  20 value 3224.941077
## iter  30 value 3146.632095
## iter  40 value 3130.870233
## iter  50 value 3130.351973
## iter  60 value 3100.803910
## iter  70 value 3061.853786
## iter  80 value 3054.181436
## iter  90 value 3015.615356
## iter 100 value 2807.827133
## final  value 2807.827133 
## stopped after 100 iterations
## # weights:  111
## initial  value 4043.381651 
## final  value 3409.054784 
## converged
## # weights:  331
## initial  value 5343.940186 
## final  value 3432.576561 
## converged
## # weights:  551
## initial  value 4851.304525 
## final  value 3432.580514 
## converged
## # weights:  111
## initial  value 3707.648048 
## final  value 3431.006503 
## converged
## # weights:  331
## initial  value 4888.830458 
## final  value 3431.006503 
## converged
## # weights:  551
## initial  value 3814.909474 
## final  value 3431.006503 
## converged
## # weights:  111
## initial  value 3742.709716 
## iter  10 value 3412.809404
## iter  20 value 3216.662395
## iter  30 value 3152.064770
## iter  40 value 3138.103881
## iter  50 value 3115.815351
## iter  60 value 3101.867654
## iter  70 value 2823.325356
## iter  80 value 2459.339549
## iter  90 value 2355.449337
## iter 100 value 2328.855864
## final  value 2328.855864 
## stopped after 100 iterations
## # weights:  331
## initial  value 4322.143356 
## iter  10 value 3431.051089
## final  value 3431.050778 
## converged
## # weights:  551
## initial  value 4279.488960 
## iter  10 value 3410.174590
## iter  20 value 3257.181604
## iter  30 value 3220.719863
## iter  40 value 3183.367976
## iter  50 value 3165.261010
## iter  60 value 3151.060531
## iter  70 value 3141.396913
## iter  80 value 3116.940247
## iter  90 value 2914.767809
## iter 100 value 2583.535935
## final  value 2583.535935 
## stopped after 100 iterations
## # weights:  111
## initial  value 5797.194391 
## final  value 3431.008492 
## converged
## # weights:  331
## initial  value 4600.694454 
## final  value 3399.615250 
## converged
## # weights:  551
## initial  value 5714.728090 
## final  value 3431.015688 
## converged
## # weights:  111
## initial  value 6557.307537 
## iter  10 value 5147.351844
## iter  20 value 4802.787929
## iter  30 value 4792.101791
## iter  40 value 4790.270988
## final  value 4790.225871 
## converged
Adult_TDA_KDE_5.50.5_n4_NN1Fit0
## Neural Network 
## 
## 10038 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 6691, 6693, 6692 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa       
##   1     0e+00  0.7953762  0.0614239378
##   1     1e-04  0.7915922  0.0052504822
##   1     1e-01  0.8285525  0.3076400422
##   3     0e+00  0.7910939  0.0015063855
##   3     1e-04  0.7923889  0.0116859107
##   3     1e-01  0.8009594  0.0928249678
##   5     0e+00  0.7909942  0.0007525722
##   5     1e-04  0.7914922  0.0044967642
##   5     1e-01  0.8255648  0.3089940717
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 1 and decay = 0.1.
Adult_TDA_KDE_5.50.5_n4_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.8117717 0.1828468    Fold1
## 2 0.8215247 0.2211546    Fold2
## 3 0.8523610 0.5189187    Fold3
ad_tda_kde_5.50.5_n4_nn1_fit_re<-Adult_TDA_KDE_5.50.5_n4_NN1Fit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n4_NN1Fit0)
## a 108-1-1 network with 111 weights
## options were - entropy fitting  decay=0.1
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##    -0.01    -0.14     0.00     0.00     0.00     0.00    -0.01     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00     0.02     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.05     0.00    -0.02 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00    -0.04     0.04     0.07     0.00    -0.08 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00    -0.02 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.05     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##    -0.04     0.00     0.00    -0.08     0.07     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00    -0.04     0.00     0.03     0.00    -0.01 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##    -0.62    -0.43     0.17     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00    -0.01     0.00     0.00 
##  b->o h1->o 
##  1.57 -3.06
# Predict outcome using Adult_TDA_KDE_5.50.5_n4_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n4_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n4_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n4_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7337  1915
##      >50K      79   437
##                                           
##                Accuracy : 0.7959          
##                  95% CI : (0.7877, 0.8038)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.2388          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9893          
##             Specificity : 0.1858          
##          Pos Pred Value : 0.7930          
##          Neg Pred Value : 0.8469          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7511          
##    Detection Prevalence : 0.9472          
##       Balanced Accuracy : 0.5876          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n4_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7337  1915
##      >50K      79   437
##                                           
##                Accuracy : 0.7959          
##                  95% CI : (0.7877, 0.8038)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.2388          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9893          
##             Specificity : 0.1858          
##          Pos Pred Value : 0.7930          
##          Neg Pred Value : 0.8469          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7511          
##    Detection Prevalence : 0.9472          
##       Balanced Accuracy : 0.5876          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n4_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.958640e-01   2.387885e-01   7.877319e-01   8.038194e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   3.442541e-18   0.000000e+00
ad_tda_kde_5.50.5_n4_nn1_cf0_ov_acc<-ad_tda_kde_5.50.5_n4_nn1_cf0$overall[1]
ad_tda_kde_5.50.5_n4_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9893474            0.1857993            0.7930177 
##       Neg Pred Value            Precision               Recall 
##            0.8468992            0.7930177            0.9893474 
##                   F1           Prevalence       Detection Rate 
##            0.8803696            0.7592138            0.7511261 
## Detection Prevalence    Balanced Accuracy 
##            0.9471744            0.5875733
ad_tda_kde_5.50.5_n4_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n4_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_nn1_n4_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.50.5_n4_nn1_fit_re)
diff_tda_kde_5.50.5_nn1_n4_3_fold
##      Accuracy
## 1 -0.01922205
## 2 -0.01170629
## 3 -0.05609885
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nn1.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nn1.n4_3_fold_odds.left<-bst_tda_kde_5.50.5_nn1.n4_3_fold$probLeft/bst_tda_kde_5.50.5_nn1.n4_3_fold$probRight
bst_tda_kde_5.50.5_nn1.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nn1.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n4_3_fold
## $winLeft
## [1] 0.9086
## 
## $winRope
## [1] 0.0914
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nn1.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n4_3_fold
## $left
## [1] 0.8235127
## 
## $rope
## [1] 0.1100822
## 
## $right
## [1] 0.06640507
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nn1_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1_n4_3_fold))
#bf_tda_kde_5.50.5_nn1.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nn1_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_nn1_n4_3_fold)
## t = -2.1147, df = 2, p-value = 0.1687
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.08803095  0.03001283
## sample estimates:
##   mean of x 
## -0.02900906
### Test set diff
diff_tda_kde_5.50.5_nn1.n4_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n4_nn1_cf0_ov_acc)
diff_tda_kde_5.50.5_nn1.n4_test
##   Accuracy 
## 0.05518018
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nn1.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1.n4_test),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nn1.n4_test_odds.left<-bst_tda_kde_5.50.5_nn1.n4_test$probLeft/bst_tda_kde_5.50.5_nn1.n4_test$probRight
bst_tda_kde_5.50.5_nn1.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nn1.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1.n4_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1585667
## 
## $winRight
## [1] 0.8414333
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nn1.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nn1.n4_test)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1.n4_test)) #bf_tda_pca_5.50.5_nn1.n4_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nn1.n4_test))

##Node5

#Neural Network 1

Adult_TDA_KDE_5.50.5_n5_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n5.vec, 
                       method = 'nnet', 
                          trControl = fitControl,
                       metric='Accuracy')
## # weights:  111
## initial  value 6024.442869 
## iter  10 value 2140.512894
## iter  20 value 2140.490647
## iter  20 value 2140.490635
## iter  20 value 2140.490635
## final  value 2140.490635 
## converged
## # weights:  331
## initial  value 3086.504343 
## iter  10 value 2146.142505
## final  value 2146.131718 
## converged
## # weights:  551
## initial  value 2727.185121 
## iter  10 value 2151.768635
## final  value 2151.762840 
## converged
## # weights:  111
## initial  value 3877.307464 
## iter  10 value 2137.874465
## iter  20 value 1977.479059
## iter  30 value 1969.401851
## iter  40 value 1927.026389
## iter  50 value 1863.233262
## iter  60 value 1606.914263
## iter  70 value 1509.211080
## iter  80 value 1482.458248
## iter  90 value 1405.750368
## iter 100 value 1376.776999
## final  value 1376.776999 
## stopped after 100 iterations
## # weights:  331
## initial  value 2779.128836 
## iter  10 value 2157.493395
## iter  20 value 2157.286710
## iter  30 value 2099.834733
## iter  40 value 1974.343314
## iter  50 value 1966.845044
## iter  60 value 1878.786050
## iter  70 value 1667.380673
## iter  80 value 1614.252845
## iter  90 value 1526.975113
## iter 100 value 1435.572131
## final  value 1435.572131 
## stopped after 100 iterations
## # weights:  551
## initial  value 2956.075235 
## iter  10 value 2157.628334
## iter  20 value 2157.461267
## iter  30 value 2157.456882
## final  value 2157.456779 
## converged
## # weights:  111
## initial  value 3835.842884 
## final  value 2157.386397 
## converged
## # weights:  331
## initial  value 2601.056410 
## final  value 2157.389529 
## converged
## # weights:  551
## initial  value 4972.003011 
## final  value 2123.545049 
## converged
## # weights:  111
## initial  value 4457.433858 
## final  value 2159.255618 
## converged
## # weights:  331
## initial  value 4158.814355 
## final  value 2159.255618 
## converged
## # weights:  551
## initial  value 4203.253305 
## final  value 2159.255618 
## converged
## # weights:  111
## initial  value 2636.813222 
## iter  10 value 2159.546013
## iter  20 value 2060.795889
## iter  30 value 2021.380054
## iter  40 value 2020.534368
## iter  50 value 2020.030012
## iter  60 value 2020.001986
## iter  70 value 2017.782202
## iter  80 value 2017.516072
## final  value 2017.515012 
## converged
## # weights:  331
## initial  value 3113.068073 
## iter  10 value 2078.119028
## iter  20 value 2064.624735
## iter  30 value 2053.964814
## iter  40 value 2036.484947
## final  value 2029.322434 
## converged
## # weights:  551
## initial  value 3125.919689 
## iter  10 value 2159.404998
## iter  20 value 2159.352977
## iter  30 value 2159.334586
## iter  40 value 2094.929465
## iter  50 value 2093.737328
## iter  60 value 2093.728419
## iter  70 value 2042.738904
## iter  80 value 2002.689494
## iter  90 value 1977.244810
## iter 100 value 1610.743237
## final  value 1610.743237 
## stopped after 100 iterations
## # weights:  111
## initial  value 2661.958823 
## final  value 2159.257846 
## converged
## # weights:  331
## initial  value 2926.039559 
## iter  10 value 2151.781846
## final  value 2151.777760 
## converged
## # weights:  551
## initial  value 3740.689754 
## final  value 2134.862943 
## converged
## # weights:  111
## initial  value 4091.635980 
## final  value 2157.551046 
## converged
## # weights:  331
## initial  value 2971.792100 
## iter  10 value 2146.311494
## final  value 2146.297500 
## converged
## # weights:  551
## initial  value 2916.303934 
## final  value 2157.551046 
## converged
## # weights:  111
## initial  value 2965.046366 
## iter  10 value 2157.701646
## iter  20 value 2147.509693
## iter  30 value 2143.577754
## iter  40 value 2014.763529
## iter  50 value 1997.233090
## iter  60 value 1923.714143
## iter  70 value 1723.130118
## iter  80 value 1594.666841
## iter  90 value 1458.344261
## iter 100 value 1407.563585
## final  value 1407.563585 
## stopped after 100 iterations
## # weights:  331
## initial  value 5771.291017 
## iter  10 value 2156.663460
## iter  20 value 2156.638363
## iter  30 value 2036.268031
## iter  40 value 2009.479547
## iter  50 value 2008.340813
## final  value 2008.335502 
## converged
## # weights:  551
## initial  value 2896.863885 
## iter  10 value 2143.949240
## iter  20 value 1973.284550
## iter  30 value 1855.767397
## iter  40 value 1664.983166
## iter  50 value 1646.156925
## iter  60 value 1518.832759
## iter  70 value 1370.827030
## iter  80 value 1315.614761
## iter  90 value 1312.083106
## iter 100 value 1294.181766
## final  value 1294.181766 
## stopped after 100 iterations
## # weights:  111
## initial  value 2613.025544 
## final  value 2157.553164 
## converged
## # weights:  331
## initial  value 3864.565883 
## final  value 2157.556945 
## converged
## # weights:  551
## initial  value 2576.285505 
## final  value 2146.312864 
## converged
## # weights:  331
## initial  value 5647.191897 
## iter  10 value 3216.569710
## iter  20 value 3128.387791
## iter  30 value 2770.485167
## iter  40 value 2676.748932
## iter  50 value 2648.168974
## iter  60 value 2631.194645
## iter  70 value 2357.243610
## iter  80 value 2235.738191
## iter  90 value 2165.517927
## iter 100 value 2101.027110
## final  value 2101.027110 
## stopped after 100 iterations
Adult_TDA_KDE_5.50.5_n5_NN1Fit0
## Neural Network 
## 
## 7540 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 5026, 5027, 5027 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa      
##   1     0e+00  0.8466842  0.005788476
##   1     1e-04  0.8461539  0.000000000
##   1     1e-01  0.8591505  0.315685954
##   3     0e+00  0.8477454  0.017282973
##   3     1e-04  0.8462865  0.001459210
##   3     1e-01  0.8623348  0.219213678
##   5     0e+00  0.8462864  0.001454869
##   5     1e-04  0.8485410  0.025902999
##   5     1e-01  0.8550410  0.132691908
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.1.
Adult_TDA_KDE_5.50.5_n5_NN1Fit0$resample
##    Accuracy     Kappa Resample
## 1 0.8579952 0.2337447    Fold1
## 2 0.8651015 0.2006675    Fold2
## 3 0.8639077 0.2232289    Fold3
ad_tda_kde_5.50.5_n5_nn1_fit_re<-Adult_TDA_KDE_5.50.5_n5_NN1Fit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n5_NN1Fit0)
## a 108-3-1 network with 331 weights
## options were - entropy fitting  decay=0.1
##    b->h1   i1->h1   i2->h1   i3->h1   i4->h1   i5->h1   i6->h1   i7->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h1   i9->h1  i10->h1  i11->h1  i12->h1  i13->h1  i14->h1  i15->h1 
##     0.00     0.00     0.00    -0.01     0.00     0.00     0.00     0.00 
##  i16->h1  i17->h1  i18->h1  i19->h1  i20->h1  i21->h1  i22->h1  i23->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h1  i25->h1  i26->h1  i27->h1  i28->h1  i29->h1  i30->h1  i31->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h1  i33->h1  i34->h1  i35->h1  i36->h1  i37->h1  i38->h1  i39->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h1  i41->h1  i42->h1  i43->h1  i44->h1  i45->h1  i46->h1  i47->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h1  i49->h1  i50->h1  i51->h1  i52->h1  i53->h1  i54->h1  i55->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h1  i57->h1  i58->h1  i59->h1  i60->h1  i61->h1  i62->h1  i63->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h1  i65->h1  i66->h1  i67->h1  i68->h1  i69->h1  i70->h1  i71->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h1  i73->h1  i74->h1  i75->h1  i76->h1  i77->h1  i78->h1  i79->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h1  i81->h1  i82->h1  i83->h1  i84->h1  i85->h1  i86->h1  i87->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h1  i89->h1  i90->h1  i91->h1  i92->h1  i93->h1  i94->h1  i95->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h1  i97->h1  i98->h1  i99->h1 i100->h1 i101->h1 i102->h1 i103->h1 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1 
##     0.00     0.00     0.00     0.00     0.00 
##    b->h2   i1->h2   i2->h2   i3->h2   i4->h2   i5->h2   i6->h2   i7->h2 
##     0.31    -0.09     0.31    -0.82    -0.04     0.45    -0.14    -0.22 
##   i8->h2   i9->h2  i10->h2  i11->h2  i12->h2  i13->h2  i14->h2  i15->h2 
##     0.33     0.13     0.30     0.00     0.00     0.00     0.00     0.00 
##  i16->h2  i17->h2  i18->h2  i19->h2  i20->h2  i21->h2  i22->h2  i23->h2 
##     0.00     0.00     0.00     0.00    -0.93     0.00     0.00     1.36 
##  i24->h2  i25->h2  i26->h2  i27->h2  i28->h2  i29->h2  i30->h2  i31->h2 
##     0.00     0.00     0.00    -0.11     0.85     1.10    -2.51    -2.03 
##  i32->h2  i33->h2  i34->h2  i35->h2  i36->h2  i37->h2  i38->h2  i39->h2 
##     0.99     1.37     1.38     0.00     0.76    -0.22     0.57     0.06 
##  i40->h2  i41->h2  i42->h2  i43->h2  i44->h2  i45->h2  i46->h2  i47->h2 
##    -0.70     0.77     0.41     0.16     0.63     0.60    -1.51    -0.48 
##  i48->h2  i49->h2  i50->h2  i51->h2  i52->h2  i53->h2  i54->h2  i55->h2 
##    -0.08    -1.11     0.45    -0.44    -1.33     0.75     4.17    -0.77 
##  i56->h2  i57->h2  i58->h2  i59->h2  i60->h2  i61->h2  i62->h2  i63->h2 
##    -2.06     1.27    -1.57    -0.57     2.26    -1.08     0.54    -0.23 
##  i64->h2  i65->h2  i66->h2  i67->h2  i68->h2  i69->h2  i70->h2  i71->h2 
##     0.00     0.00    -0.04    -0.49     0.74    -0.51    -2.04     1.10 
##  i72->h2  i73->h2  i74->h2  i75->h2  i76->h2  i77->h2  i78->h2  i79->h2 
##    -1.46     1.53     0.31     1.51    -0.27     0.03    -1.21     0.56 
##  i80->h2  i81->h2  i82->h2  i83->h2  i84->h2  i85->h2  i86->h2  i87->h2 
##    -1.11     0.08     0.00     0.13     1.01     0.16     0.74    -0.71 
##  i88->h2  i89->h2  i90->h2  i91->h2  i92->h2  i93->h2  i94->h2  i95->h2 
##    -0.82     1.33    -2.19     1.81     0.93    -0.41     1.20     0.20 
##  i96->h2  i97->h2  i98->h2  i99->h2 i100->h2 i101->h2 i102->h2 i103->h2 
##     0.71    -1.97    -0.80    -0.17     1.16     0.53    -0.43     0.42 
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2 
##    -0.17     1.16    -0.86     0.51    -1.94 
##    b->h3   i1->h3   i2->h3   i3->h3   i4->h3   i5->h3   i6->h3   i7->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##   i8->h3   i9->h3  i10->h3  i11->h3  i12->h3  i13->h3  i14->h3  i15->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i16->h3  i17->h3  i18->h3  i19->h3  i20->h3  i21->h3  i22->h3  i23->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i24->h3  i25->h3  i26->h3  i27->h3  i28->h3  i29->h3  i30->h3  i31->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i32->h3  i33->h3  i34->h3  i35->h3  i36->h3  i37->h3  i38->h3  i39->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i40->h3  i41->h3  i42->h3  i43->h3  i44->h3  i45->h3  i46->h3  i47->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i48->h3  i49->h3  i50->h3  i51->h3  i52->h3  i53->h3  i54->h3  i55->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i56->h3  i57->h3  i58->h3  i59->h3  i60->h3  i61->h3  i62->h3  i63->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i64->h3  i65->h3  i66->h3  i67->h3  i68->h3  i69->h3  i70->h3  i71->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i72->h3  i73->h3  i74->h3  i75->h3  i76->h3  i77->h3  i78->h3  i79->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i80->h3  i81->h3  i82->h3  i83->h3  i84->h3  i85->h3  i86->h3  i87->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i88->h3  i89->h3  i90->h3  i91->h3  i92->h3  i93->h3  i94->h3  i95->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
##  i96->h3  i97->h3  i98->h3  i99->h3 i100->h3 i101->h3 i102->h3 i103->h3 
##     0.00     0.00     0.00     0.00     0.00     0.00     0.00     0.00 
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3 
##     0.00     0.00     0.00     0.00     0.00 
##  b->o h1->o h2->o h3->o 
##  0.17  0.00 -5.48  0.22
# Predict outcome using Adult_TDA_KDE_5.50.5_n5_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n5_NN1Fit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n5_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n5_nn1_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6374  1460
##      >50K    1042   892
##                                           
##                Accuracy : 0.7439          
##                  95% CI : (0.7351, 0.7525)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.9998          
##                                           
##                   Kappa : 0.2542          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.8595          
##             Specificity : 0.3793          
##          Pos Pred Value : 0.8136          
##          Neg Pred Value : 0.4612          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6525          
##    Detection Prevalence : 0.8020          
##       Balanced Accuracy : 0.6194          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n5_nn1_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6374  1460
##      >50K    1042   892
##                                           
##                Accuracy : 0.7439          
##                  95% CI : (0.7351, 0.7525)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.9998          
##                                           
##                   Kappa : 0.2542          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.8595          
##             Specificity : 0.3793          
##          Pos Pred Value : 0.8136          
##          Neg Pred Value : 0.4612          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6525          
##    Detection Prevalence : 0.8020          
##       Balanced Accuracy : 0.6194          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n5_nn1_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.438575e-01   2.541671e-01   7.350781e-01   7.524912e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   9.997987e-01   7.641451e-17
ad_tda_kde_5.50.5_n5_nn1_cf0_ov_acc<-ad_tda_kde_5.50.5_n5_nn1_cf0$overall[1]
ad_tda_kde_5.50.5_n5_nn1_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.8594930            0.3792517            0.8136329 
##       Neg Pred Value            Precision               Recall 
##            0.4612203            0.8136329            0.8594930 
##                   F1           Prevalence       Detection Rate 
##            0.8359344            0.7592138            0.6525389 
## Detection Prevalence    Balanced Accuracy 
##            0.8020066            0.6193723
ad_tda_kde_5.50.5_n5_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n5_nn1_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_nn1_n5_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.50.5_n5_nn1_fit_re)
diff_tda_kde_5.50.5_nn1_n5_3_fold
##      Accuracy
## 1 -0.06544554
## 2 -0.05528310
## 3 -0.06764551
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nn1.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nn1.n5_3_fold_odds.left<-bst_tda_kde_5.50.5_nn1.n5_3_fold$probLeft/bst_tda_kde_5.50.5_nn1.n5_3_fold$probRight
bst_tda_kde_5.50.5_nn1.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nn1.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n5_3_fold
## $winLeft
## [1] 0.9913667
## 
## $winRope
## [1] 0.008633333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nn1.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n5_3_fold
## $left
## [1] 0.9965678
## 
## $rope
## [1] 0.001618088
## 
## $right
## [1] 0.001814075
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nn1_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1_n5_3_fold))
#bf_tda_kde_5.50.5_nn1.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nn1_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_nn1_n5_3_fold)
## t = -16.492, df = 2, p-value = 0.003657
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.07917364 -0.04640912
## sample estimates:
##   mean of x 
## -0.06279138
### Test set diff
diff_tda_kde_5.50.5_nn1.n5_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n5_nn1_cf0_ov_acc)
diff_tda_kde_5.50.5_nn1.n5_test
##  Accuracy 
## 0.1071867
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nn1.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nn1.n5_test),-0.01,0.01)
bst_tda_kde_5.50.5_nn1.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nn1.n5_test_odds.left<-bst_tda_kde_5.50.5_nn1.n5_test$probLeft/bst_tda_kde_5.50.5_nn1.n5_test$probRight
bst_tda_kde_5.50.5_nn1.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nn1.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nn1.n4_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nn1.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1598667
## 
## $winRight
## [1] 0.8401333
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nn1.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nn1.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nn1.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nn1.n5_test)))

#BayesFactor
#bf_tda_kde_5.50.5_nn1.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nn1.n5_test)) #bf_tda_pca_5.50.5_nn1.n5_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nn1.n5_test)) 


##Logistic Regression 

adultLrFit <- train(as.factor(adult_df1) ~ ., 
                 data = adult.one_hot_df4Train, 
                 family = 'binomial',
                method = 'glm', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
adultLrFit
## Generalized Linear Model 
## 
## 22793 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 15195, 15196, 15195 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8503929  0.5647736
adultLrFit$resample
##    Accuracy     Kappa Resample
## 1 0.8504870 0.5675901    Fold1
## 2 0.8562590 0.5793290    Fold2
## 3 0.8444327 0.5474018    Fold3
ad_lr_fit_re<-adultLrFit$resample[1]

summary(adultLrFit)
## 
## Call:
## NULL
## 
## Coefficients: (9 not defined because of singularities)
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     1.004e+11  3.373e+12   0.030 0.976258    
## V1                              2.436e-02  1.969e-03  12.371  < 2e-16 ***
## V2..                           -1.004e+11  3.373e+12  -0.030 0.976258    
## V2.Federal.gov                 -1.004e+11  3.373e+12  -0.030 0.976258    
## V2.Local.gov                   -1.004e+11  3.373e+12  -0.030 0.976258    
## V2.Never.worked                -1.004e+11  3.373e+12  -0.030 0.976258    
## V2.Private                     -1.004e+11  3.373e+12  -0.030 0.976258    
## V2.Self.emp.inc                -1.004e+11  3.373e+12  -0.030 0.976258    
## V2.Self.emp.not.inc            -1.004e+11  3.373e+12  -0.030 0.976258    
## V2.State.gov                   -1.004e+11  3.373e+12  -0.030 0.976258    
## V2.Without.pay                 -1.004e+11  3.373e+12  -0.030 0.976258    
## V3                              6.225e-07  2.063e-07   3.017 0.002549 ** 
## V4.10th                        -1.020e+00  1.790e-01  -5.700 1.20e-08 ***
## V4.11th                        -1.030e+00  1.814e-01  -5.677 1.37e-08 ***
## V4.12th                        -8.317e-01  2.708e-01  -3.072 0.002128 ** 
## V4.1st.4th                     -2.383e+00  7.483e-01  -3.184 0.001453 ** 
## V4.5th.6th                     -1.330e+00  3.427e-01  -3.882 0.000104 ***
## V4.7th.8th                     -1.706e+00  2.248e-01  -7.591 3.18e-14 ***
## V4.9th                         -1.220e+00  2.454e-01  -4.972 6.62e-07 ***
## V4.Assoc.acdm                   1.108e-01  1.159e-01   0.956 0.339220    
## V4.Assoc.voc                    2.123e-01  1.031e-01   2.060 0.039387 *  
## V4.Bachelors                    7.471e-01  6.698e-02  11.154  < 2e-16 ***
## V4.Doctorate                    2.044e+00  1.955e-01  10.458  < 2e-16 ***
## V4.HS.grad                     -3.357e-01  6.048e-02  -5.550 2.85e-08 ***
## V4.Masters                      1.234e+00  9.712e-02  12.708  < 2e-16 ***
## V4.Preschool                   -3.280e+01  4.990e+04  -0.001 0.999475    
## V4.Prof.school                  1.941e+00  1.672e-01  11.610  < 2e-16 ***
## V4.Some.college                        NA         NA      NA       NA    
## V5                                     NA         NA      NA       NA    
## V6.Divorced                    -2.052e-01  1.867e-01  -1.099 0.271725    
## V6.Married.AF.spouse            2.342e+00  6.905e-01   3.391 0.000695 ***
## V6.Married.civ.spouse           1.906e+00  3.546e-01   5.373 7.73e-08 ***
## V6.Married.spouse.absent       -2.256e-01  3.165e-01  -0.713 0.476062    
## V6.Never.married               -6.900e-01  1.938e-01  -3.560 0.000371 ***
## V6.Separated                   -2.544e-01  2.488e-01  -1.022 0.306548    
## V6.Widowed                             NA         NA      NA       NA    
## V7..                                   NA         NA      NA       NA    
## V7.Adm.clerical                 8.686e-02  1.186e-01   0.732 0.464145    
## V7.Armed.Forces                -6.328e-01  1.843e+00  -0.343 0.731268    
## V7.Craft.repair                 2.720e-01  1.006e-01   2.704 0.006854 ** 
## V7.Exec.managerial              9.426e-01  1.036e-01   9.094  < 2e-16 ***
## V7.Farming.fishing             -6.978e-01  1.656e-01  -4.215 2.50e-05 ***
## V7.Handlers.cleaners           -5.443e-01  1.765e-01  -3.084 0.002045 ** 
## V7.Machine.op.inspct           -9.237e-03  1.261e-01  -0.073 0.941622    
## V7.Other.service               -6.240e-01  1.487e-01  -4.196 2.72e-05 ***
## V7.Priv.house.serv             -3.986e+00  1.779e+00  -2.240 0.025085 *  
## V7.Prof.specialty               6.232e-01  1.116e-01   5.583 2.37e-08 ***
## V7.Protective.serv              8.390e-01  1.542e-01   5.443 5.25e-08 ***
## V7.Sales                        4.484e-01  1.073e-01   4.180 2.92e-05 ***
## V7.Tech.support                 7.562e-01  1.423e-01   5.314 1.07e-07 ***
## V7.Transport.moving                    NA         NA      NA       NA    
## V8.Husband                     -1.391e+00  1.239e-01 -11.234  < 2e-16 ***
## V8.Not.in.family               -9.144e-01  3.262e-01  -2.804 0.005053 ** 
## V8.Other.relative              -1.789e+00  3.050e-01  -5.865 4.48e-09 ***
## V8.Own.child                   -2.218e+00  3.258e-01  -6.809 9.81e-12 ***
## V8.Unmarried                   -1.154e+00  3.389e-01  -3.404 0.000663 ***
## V8.Wife                                NA         NA      NA       NA    
## V9.Amer.Indian.Eskimo          -5.667e-01  2.717e-01  -2.086 0.036987 *  
## V9.Asian.Pac.Islander           2.355e-01  1.888e-01   1.247 0.212284    
## V9.Black                       -1.908e-01  9.215e-02  -2.071 0.038362 *  
## V9.Other                       -5.408e-01  3.570e-01  -1.515 0.129857    
## V9.White                               NA         NA      NA       NA    
## V10.Female                     -8.573e-01  9.640e-02  -8.893  < 2e-16 ***
## V10.Male                               NA         NA      NA       NA    
## V11                             3.299e-04  1.249e-05  26.427  < 2e-16 ***
## V12                             6.850e-04  4.458e-05  15.367  < 2e-16 ***
## V13                             2.745e-02  1.928e-03  14.238  < 2e-16 ***
## V14..                          -8.561e-01  7.331e-01  -1.168 0.242854    
## V14.Cambodia                    1.024e+00  1.201e+00   0.853 0.393858    
## V14.Canada                     -6.150e-01  7.825e-01  -0.786 0.431884    
## V14.China                      -1.618e+00  8.392e-01  -1.928 0.053845 .  
## V14.Columbia                   -2.853e+00  1.102e+00  -2.590 0.009596 ** 
## V14.Cuba                       -3.366e-01  8.055e-01  -0.418 0.676041    
## V14.Dominican.Republic         -1.558e+01  5.124e+02  -0.030 0.975747    
## V14.Ecuador                    -4.222e-01  1.048e+00  -0.403 0.687012    
## V14.El.Salvador                -1.642e+00  9.100e-01  -1.805 0.071101 .  
## V14.England                    -6.938e-01  8.002e-01  -0.867 0.385949    
## V14.France                      1.155e-01  9.685e-01   0.119 0.905070    
## V14.Germany                    -3.877e-01  7.768e-01  -0.499 0.617685    
## V14.Greece                     -1.868e+00  9.679e-01  -1.930 0.053667 .  
## V14.Guatemala                  -1.259e+00  1.230e+00  -1.024 0.306049    
## V14.Haiti                      -1.756e+00  1.304e+00  -1.347 0.177919    
## V14.Holand.Netherlands         -2.348e+01  3.498e+05   0.000 0.999946    
## V14.Honduras                   -2.044e+00  2.826e+00  -0.723 0.469603    
## V14.Hong                       -1.077e+00  1.156e+00  -0.931 0.351711    
## V14.Hungary                    -2.171e-01  1.174e+00  -0.185 0.853253    
## V14.India                      -1.140e+00  7.971e-01  -1.430 0.152592    
## V14.Iran                       -7.421e-01  8.728e-01  -0.850 0.395173    
## V14.Ireland                     1.776e-01  9.909e-01   0.179 0.857786    
## V14.Italy                      -1.438e-01  8.048e-01  -0.179 0.858227    
## V14.Jamaica                    -4.129e-01  8.575e-01  -0.482 0.630154    
## V14.Japan                      -3.069e-01  8.731e-01  -0.352 0.725187    
## V14.Laos                       -1.260e+00  1.153e+00  -1.093 0.274549    
## V14.Mexico                     -1.222e+00  7.591e-01  -1.610 0.107331    
## V14.Nicaragua                  -1.457e+00  1.075e+00  -1.354 0.175590    
## V14.Outlying.US.Guam.USVI.etc. -2.551e+01  1.176e+05   0.000 0.999827    
## V14.Peru                       -2.316e+00  1.358e+00  -1.706 0.087944 .  
## V14.Philippines                -3.658e-01  7.853e-01  -0.466 0.641382    
## V14.Poland                     -9.308e-01  8.405e-01  -1.107 0.268139    
## V14.Portugal                   -4.015e-01  9.766e-01  -0.411 0.681005    
## V14.Puerto.Rico                -1.043e+00  8.473e-01  -1.231 0.218294    
## V14.Scotland                   -4.331e-01  1.124e+00  -0.385 0.700074    
## V14.South                      -2.195e+00  8.821e-01  -2.489 0.012820 *  
## V14.Taiwan                     -1.294e+00  9.023e-01  -1.434 0.151698    
## V14.Thailand                   -1.359e+00  1.131e+00  -1.202 0.229535    
## V14.Trinadad.Tobago            -7.640e-01  1.152e+00  -0.663 0.507367    
## V14.United.States              -6.141e-01  7.150e-01  -0.859 0.390402    
## V14.Vietnam                    -2.341e+00  1.099e+00  -2.130 0.033147 *  
## V14.Yugoslavia                         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 25165  on 22792  degrees of freedom
## Residual deviance: 14314  on 22693  degrees of freedom
## AIC: 14514
## 
## Number of Fisher Scoring iterations: 25
#varImp (adultLrFit)

# Predict outcome using model from training data based on testing data
predictions <- predict(adultLrFit, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
lr_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
lr_cf
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6930   964
##      >50K     486  1388
##                                           
##                Accuracy : 0.8516          
##                  95% CI : (0.8443, 0.8586)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5637          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9345          
##             Specificity : 0.5901          
##          Pos Pred Value : 0.8779          
##          Neg Pred Value : 0.7407          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7095          
##    Detection Prevalence : 0.8081          
##       Balanced Accuracy : 0.7623          
##                                           
##        'Positive' Class :  <=50K          
## 
lr_cf$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.515561e-01   5.637173e-01   8.443495e-01   8.585524e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  2.269119e-112   5.338073e-36
lr_cf_ov_acc<-lr_cf$overall[1]
lr_cf$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9344660            0.5901361            0.8778819 
##       Neg Pred Value            Precision               Recall 
##            0.7406617            0.8778819            0.9344660 
##                   F1           Prevalence       Detection Rate 
##            0.9052907            0.7592138            0.7094595 
## Detection Prevalence    Balanced Accuracy 
##            0.8081491            0.7623010
lr_cf_pre_rec_f1<-lr_cf$byClass[5:7]


##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node1

Adult_TDA_PC_5.50.5_n1_LrFit0 <- glm(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n1.vec, family = 'binomial')
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.50.5_n1_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_adult_5.50.5.n1.vec, 
                 family = 'binomial',
                method = 'glm', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.50.5_n1_LrFit0
## Generalized Linear Model 
## 
## 4917 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 3277, 3279, 3278 
## Resampling results:
## 
##   Accuracy   Kappa     
##   0.9723412  0.01106462
Adult_TDA_PC_5.50.5_n1_LrFit0$resample
##    Accuracy        Kappa Resample
## 1 0.9719512 -0.002338435    Fold1
## 2 0.9737485  0.000000000    Fold2
## 3 0.9713240  0.035532296    Fold3
ad_tda_pc_5.50.5_n1_lr_fit_re<-Adult_TDA_PC_5.50.5_n1_LrFit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n1_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (25 not defined because of singularities)
##                                  Estimate Std. Error    z value Pr(>|z|)    
## (Intercept)                     6.754e+15  1.035e+08   65242260   <2e-16 ***
## V1                             -4.021e+13  1.001e+05 -401739559   <2e-16 ***
## V2..                            1.555e+15  1.144e+07  135880986   <2e-16 ***
## V2.Federal.gov                 -8.210e+14  6.396e+06 -128368422   <2e-16 ***
## V2.Local.gov                   -3.322e+14  5.648e+06  -58813598   <2e-16 ***
## V2.Never.worked                        NA         NA         NA       NA    
## V2.Private                      2.257e+14  4.636e+06   48689222   <2e-16 ***
## V2.Self.emp.inc                -1.030e+15  5.363e+06 -192126826   <2e-16 ***
## V2.Self.emp.not.inc            -1.764e+15  5.342e+06 -330201149   <2e-16 ***
## V2.State.gov                           NA         NA         NA       NA    
## V2.Without.pay                         NA         NA         NA       NA    
## V3                              1.936e+08  9.807e+00   19741148   <2e-16 ***
## V4.10th                         5.343e+14  1.892e+07   28232875   <2e-16 ***
## V4.11th                         1.643e+15  2.266e+07   72471752   <2e-16 ***
## V4.12th                        -5.781e+14  3.020e+07  -19138755   <2e-16 ***
## V4.1st.4th                      1.762e+15  6.954e+07   25333567   <2e-16 ***
## V4.5th.6th                      1.178e+15  3.942e+07   29884200   <2e-16 ***
## V4.7th.8th                      1.870e+15  1.548e+07  120788873   <2e-16 ***
## V4.9th                          1.995e+15  3.027e+07   65929241   <2e-16 ***
## V4.Assoc.acdm                  -6.403e+14  5.949e+06 -107617549   <2e-16 ***
## V4.Assoc.voc                   -4.419e+14  5.274e+06  -83784354   <2e-16 ***
## V4.Bachelors                   -9.810e+14  3.195e+06 -307070713   <2e-16 ***
## V4.Doctorate                   -2.241e+15  5.430e+06 -412621450   <2e-16 ***
## V4.HS.grad                      1.710e+14  3.533e+06   48411646   <2e-16 ***
## V4.Masters                     -1.096e+15  3.832e+06 -286104624   <2e-16 ***
## V4.Preschool                           NA         NA         NA       NA    
## V4.Prof.school                 -1.241e+15  4.949e+06 -250795563   <2e-16 ***
## V4.Some.college                        NA         NA         NA       NA    
## V5                                     NA         NA         NA       NA    
## V6.Divorced                     2.246e+15  6.983e+07   32166187   <2e-16 ***
## V6.Married.AF.spouse            5.415e+15  1.067e+08   50765144   <2e-16 ***
## V6.Married.civ.spouse           3.561e+15  9.547e+07   37296901   <2e-16 ***
## V6.Married.spouse.absent       -3.913e+14  9.503e+07   -4116981   <2e-16 ***
## V6.Never.married               -1.548e+14  7.763e+07   -1993868   <2e-16 ***
## V6.Separated                           NA         NA         NA       NA    
## V6.Widowed                             NA         NA         NA       NA    
## V7..                                   NA         NA         NA       NA    
## V7.Adm.clerical                -8.947e+13  8.646e+06  -10348260   <2e-16 ***
## V7.Armed.Forces                 1.572e+15  6.759e+07   23257677   <2e-16 ***
## V7.Craft.repair                -3.669e+12  5.511e+06    -665768   <2e-16 ***
## V7.Exec.managerial              3.655e+14  5.294e+06   69045202   <2e-16 ***
## V7.Farming.fishing             -5.738e+14  8.252e+06  -69535707   <2e-16 ***
## V7.Handlers.cleaners            5.386e+14  1.867e+07   28841951   <2e-16 ***
## V7.Machine.op.inspct            7.351e+14  1.057e+07   69529076   <2e-16 ***
## V7.Other.service                2.137e+15  1.938e+07  110275000   <2e-16 ***
## V7.Priv.house.serv                     NA         NA         NA       NA    
## V7.Prof.specialty               3.170e+14  5.589e+06   56730038   <2e-16 ***
## V7.Protective.serv             -5.345e+14  7.672e+06  -69670508   <2e-16 ***
## V7.Sales                        8.977e+14  5.694e+06  157643421   <2e-16 ***
## V7.Tech.support                 7.911e+14  7.616e+06  103877755   <2e-16 ***
## V7.Transport.moving                    NA         NA         NA       NA    
## V8.Husband                     -1.433e+15  1.969e+07  -72760359   <2e-16 ***
## V8.Not.in.family                1.555e+15  7.050e+07   22057608   <2e-16 ***
## V8.Other.relative              -4.701e+13  7.015e+07    -670114   <2e-16 ***
## V8.Own.child                           NA         NA         NA       NA    
## V8.Unmarried                   -5.367e+14  8.790e+07   -6106524   <2e-16 ***
## V8.Wife                                NA         NA         NA       NA    
## V9.Amer.Indian.Eskimo           8.136e+14  2.250e+07   36160676   <2e-16 ***
## V9.Asian.Pac.Islander          -1.535e+15  9.355e+06 -164127666   <2e-16 ***
## V9.Black                       -1.255e+15  7.568e+06 -165859104   <2e-16 ***
## V9.Other                        1.048e+15  2.296e+07   45649335   <2e-16 ***
## V9.White                               NA         NA         NA       NA    
## V10.Female                             NA         NA         NA       NA    
## V10.Male                               NA         NA         NA       NA    
## V11                             1.515e+10  6.103e+01  248215727   <2e-16 ***
## V12                             1.548e+11  1.518e+03  101924136   <2e-16 ***
## V13                            -4.157e+13  8.947e+04 -464636898   <2e-16 ***
## V14..                          -2.127e+14  3.449e+07   -6166348   <2e-16 ***
## V14.Cambodia                    1.804e+15  5.903e+07   30563828   <2e-16 ***
## V14.Canada                     -9.754e+14  3.623e+07  -26924014   <2e-16 ***
## V14.China                       3.139e+14  3.899e+07    8050228   <2e-16 ***
## V14.Columbia                    2.155e+15  7.535e+07   28607405   <2e-16 ***
## V14.Cuba                       -1.569e+15  3.936e+07  -39865728   <2e-16 ***
## V14.Dominican.Republic                 NA         NA         NA       NA    
## V14.Ecuador                     2.000e+14  5.830e+07    3431112   <2e-16 ***
## V14.El.Salvador                 5.352e+14  4.521e+07   11838315   <2e-16 ***
## V14.England                     2.845e+13  3.794e+07     749833   <2e-16 ***
## V14.France                     -3.013e+14  4.125e+07   -7303826   <2e-16 ***
## V14.Germany                    -3.903e+14  3.639e+07  -10723816   <2e-16 ***
## V14.Greece                     -3.609e+15  4.359e+07  -82783695   <2e-16 ***
## V14.Guatemala                          NA         NA         NA       NA    
## V14.Haiti                              NA         NA         NA       NA    
## V14.Holand.Netherlands                 NA         NA         NA       NA    
## V14.Honduras                           NA         NA         NA       NA    
## V14.Hong                        2.628e+14  4.616e+07    5693624   <2e-16 ***
## V14.Hungary                     6.213e+14  5.836e+07   10646011   <2e-16 ***
## V14.India                       3.249e+13  3.665e+07     886349   <2e-16 ***
## V14.Iran                       -3.496e+15  3.831e+07  -91256665   <2e-16 ***
## V14.Ireland                    -2.987e+15  5.141e+07  -58110019   <2e-16 ***
## V14.Italy                       3.034e+13  3.807e+07     797040   <2e-16 ***
## V14.Jamaica                     2.904e+15  7.572e+07   38349803   <2e-16 ***
## V14.Japan                       5.594e+14  3.867e+07   14465190   <2e-16 ***
## V14.Laos                        2.427e+15  7.625e+07   31826490   <2e-16 ***
## V14.Mexico                      1.998e+14  3.917e+07    5100435   <2e-16 ***
## V14.Nicaragua                          NA         NA         NA       NA    
## V14.Outlying.US.Guam.USVI.etc.         NA         NA         NA       NA    
## V14.Peru                       -1.239e+15  7.521e+07  -16478711   <2e-16 ***
## V14.Philippines                -1.491e+15  3.695e+07  -40349667   <2e-16 ***
## V14.Poland                     -2.029e+14  4.348e+07   -4666562   <2e-16 ***
## V14.Portugal                    1.810e+13  5.836e+07     310228   <2e-16 ***
## V14.Puerto.Rico                -5.417e+14  4.800e+07  -11284445   <2e-16 ***
## V14.Scotland                    2.444e+15  7.521e+07   32494881   <2e-16 ***
## V14.South                      -2.945e+14  4.099e+07   -7184583   <2e-16 ***
## V14.Taiwan                      1.731e+15  3.897e+07   44411006   <2e-16 ***
## V14.Thailand                    1.398e+13  5.903e+07     236749   <2e-16 ***
## V14.Trinadad.Tobago                    NA         NA         NA       NA    
## V14.United.States              -8.790e+14  3.373e+07  -26058180   <2e-16 ***
## V14.Vietnam                     3.345e+15  7.597e+07   44023473   <2e-16 ***
## V14.Yugoslavia                         NA         NA         NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1208.3  on 4916  degrees of freedom
## Residual deviance: 9587.6  on 4833  degrees of freedom
## AIC: 9755.6
## 
## Number of Fisher Scoring iterations: 25
# Predict outcome using Adult_TDA_PC_5.50.5_n1_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n1_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n1_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n1_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K     38    21
##      >50K    7378  2331
##                                           
##                Accuracy : 0.2425          
##                  95% CI : (0.2341, 0.2512)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.0018         
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.005124        
##             Specificity : 0.991071        
##          Pos Pred Value : 0.644068        
##          Neg Pred Value : 0.240087        
##              Prevalence : 0.759214        
##          Detection Rate : 0.003890        
##    Detection Prevalence : 0.006040        
##       Balanced Accuracy : 0.498098        
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n1_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K     38    21
##      >50K    7378  2331
##                                           
##                Accuracy : 0.2425          
##                  95% CI : (0.2341, 0.2512)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.0018         
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.005124        
##             Specificity : 0.991071        
##          Pos Pred Value : 0.644068        
##          Neg Pred Value : 0.240087        
##              Prevalence : 0.759214        
##          Detection Rate : 0.003890        
##    Detection Prevalence : 0.006040        
##       Balanced Accuracy : 0.498098        
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n1_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##    0.242526618   -0.001839738    0.234053383    0.251153627    0.759213759 
## AccuracyPValue  McnemarPValue 
##    1.000000000    0.000000000
ad_tda_pc_5.50.5_n1_lr_cf0_ov_acc<-ad_tda_pc_5.50.5_n1_lr_cf0$overall[1]
ad_tda_pc_5.50.5_n1_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##          0.005124056          0.991071429          0.644067797 
##       Neg Pred Value            Precision               Recall 
##          0.240086518          0.644067797          0.005124056 
##                   F1           Prevalence       Detection Rate 
##          0.010167224          0.759213759          0.003890254 
## Detection Prevalence    Balanced Accuracy 
##          0.006040131          0.498097742
ad_tda_pc_5.50.5_n1_lr_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n1_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers

### 3-fold diff

diff_tda_pca_5.50.5_lr_n1_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.50.5_n1_lr_fit_re)
diff_tda_pca_5.50.5_lr_n1_3_fold
##     Accuracy
## 1 -0.1214642
## 2 -0.1174894
## 3 -0.1268912
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_lr.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n1_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_lr.n1_3_fold_odds.left<-bst_tda_pca_5.50.5_lr.n1_3_fold$probLeft/bst_tda_pca_5.50.5_lr.n1_3_fold$probRight
bst_tda_pca_5.50.5_lr.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_lr.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n1_3_fold
## $winLeft
## [1] 0.9917
## 
## $winRope
## [1] 0.0083
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_lr.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n1_3_fold
## $left
## [1] 0.9996055
## 
## $rope
## [1] 0.000110433
## 
## $right
## [1] 0.0002840615
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_lr_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr_n1_3_fold))
#bf_tda_pca_5.50.5_lr.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_lr_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_lr_n1_3_fold)
## t = -44.754, df = 2, p-value = 0.0004989
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1336723 -0.1102243
## sample estimates:
##  mean of x 
## -0.1219483
### Test set diff
diff_tda_pca_5.50.5_lr.n1_test<-(lr_cf_ov_acc - ad_tda_pc_5.50.5_n1_lr_cf0_ov_acc)
diff_tda_pca_5.50.5_lr.n1_test
##  Accuracy 
## 0.6090295
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_lr.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr.n1_test),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_lr.n1_test_odds.left<-bst_tda_pca_5.50.5_lr.n1_test$probLeft/bst_tda_pca_5.50.5_lr.n1_test$probRight
bst_tda_pca_5.50.5_lr.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_lr.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr.n1_test),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1582333
## 
## $winRight
## [1] 0.8417667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_lr.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_lr.n1_test)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr.n1_test)) #bf_tda_pca_5.50.5_lr.n1_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_lr.n1_test))

##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node2

Adult_TDA_PC_5.50.5_n2_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_adult_5.50.5.n2.vec, 
                 family = 'binomial',
                method = 'glm', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.50.5_n2_LrFit0
## Generalized Linear Model 
## 
## 12206 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8138, 8138, 8136 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.7145669  0.4268096
Adult_TDA_PC_5.50.5_n2_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.7163225 0.4294322    Fold1
## 2 0.7148476 0.4294944    Fold2
## 3 0.7125307 0.4215021    Fold3
ad_tda_pc_5.50.5_n2_lr_fit_re<-Adult_TDA_PC_5.50.5_n2_LrFit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n2_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (11 not defined because of singularities)
##                                  Estimate Std. Error    z value Pr(>|z|)    
## (Intercept)                    -1.967e+13  2.870e+13 -6.850e-01 0.493065    
## V1                              1.336e-02  2.046e-03  6.528e+00 6.67e-11 ***
## V2..                           -4.308e+12  8.550e+12 -5.040e-01 0.614376    
## V2.Federal.gov                 -4.308e+12  8.551e+12 -5.040e-01 0.614396    
## V2.Local.gov                   -4.308e+12  8.551e+12 -5.040e-01 0.614388    
## V2.Never.worked                        NA         NA         NA       NA    
## V2.Private                     -4.308e+12  8.551e+12 -5.040e-01 0.614408    
## V2.Self.emp.inc                -4.308e+12  8.551e+12 -5.040e-01 0.614386    
## V2.Self.emp.not.inc            -4.308e+12  8.551e+12 -5.040e-01 0.614388    
## V2.State.gov                   -4.308e+12  8.550e+12 -5.040e-01 0.614372    
## V2.Without.pay                 -4.308e+12  8.551e+12 -5.040e-01 0.614382    
## V3                              1.019e-06  2.175e-07  4.687e+00 2.77e-06 ***
## V4.10th                        -6.537e-01  1.826e-01 -3.579e+00 0.000345 ***
## V4.11th                        -7.294e-01  2.044e-01 -3.568e+00 0.000359 ***
## V4.12th                        -2.521e-01  2.832e-01 -8.900e-01 0.373348    
## V4.1st.4th                     -5.050e-01  5.586e-01 -9.040e-01 0.365952    
## V4.5th.6th                     -5.680e-01  3.741e-01 -1.518e+00 0.128967    
## V4.7th.8th                     -1.287e+00  2.030e-01 -6.339e+00 2.31e-10 ***
## V4.9th                         -1.013e+00  2.999e-01 -3.376e+00 0.000734 ***
## V4.Assoc.acdm                   5.309e-03  1.222e-01  4.300e-02 0.965361    
## V4.Assoc.voc                   -4.145e-02  1.021e-01 -4.060e-01 0.684913    
## V4.Bachelors                    5.747e-01  6.873e-02  8.361e+00  < 2e-16 ***
## V4.Doctorate                    1.132e+00  1.918e-01  5.905e+00 3.53e-09 ***
## V4.HS.grad                     -3.084e-01  6.013e-02 -5.129e+00 2.91e-07 ***
## V4.Masters                      9.349e-01  1.021e-01  9.157e+00  < 2e-16 ***
## V4.Preschool                           NA         NA         NA       NA    
## V4.Prof.school                  1.073e+00  1.699e-01  6.317e+00 2.67e-10 ***
## V4.Some.college                        NA         NA         NA       NA    
## V5                                     NA         NA         NA       NA    
## V6.Divorced                     2.398e+13  3.454e+13  6.940e-01 0.487623    
## V6.Married.AF.spouse            2.398e+13  3.454e+13  6.940e-01 0.487623    
## V6.Married.civ.spouse           2.398e+13  3.454e+13  6.940e-01 0.487623    
## V6.Married.spouse.absent        4.528e+15  3.454e+13  1.311e+02  < 2e-16 ***
## V6.Never.married                2.398e+13  3.454e+13  6.940e-01 0.487623    
## V6.Separated                    2.398e+13  3.454e+13  6.940e-01 0.487623    
## V6.Widowed                      2.398e+13  3.454e+13  6.940e-01 0.487623    
## V7..                                   NA         NA         NA       NA    
## V7.Adm.clerical                 6.669e-01  1.361e-01  4.899e+00 9.63e-07 ***
## V7.Armed.Forces                -5.528e-01  1.732e+00 -3.190e-01 0.749587    
## V7.Craft.repair                 1.326e-01  9.167e-02  1.447e+00 0.148034    
## V7.Exec.managerial              9.516e-01  9.742e-02  9.768e+00  < 2e-16 ***
## V7.Farming.fishing             -6.079e-01  1.485e-01 -4.092e+00 4.27e-05 ***
## V7.Handlers.cleaners            6.106e-02  1.781e-01  3.430e-01 0.731742    
## V7.Machine.op.inspct            1.666e-01  1.202e-01  1.386e+00 0.165808    
## V7.Other.service                1.136e-02  1.794e-01  6.300e-02 0.949500    
## V7.Priv.house.serv             -2.811e+01  3.621e+05  0.000e+00 0.999938    
## V7.Prof.specialty               6.883e-01  1.073e-01  6.417e+00 1.39e-10 ***
## V7.Protective.serv              6.582e-01  1.474e-01  4.466e+00 7.97e-06 ***
## V7.Sales                        4.691e-01  1.005e-01  4.669e+00 3.03e-06 ***
## V7.Tech.support                 9.348e-01  1.447e-01  6.462e+00 1.03e-10 ***
## V7.Transport.moving                    NA         NA         NA       NA    
## V8.Husband                      4.404e-01  1.426e+00  3.090e-01 0.757500    
## V8.Not.in.family                1.221e+00  1.601e+00  7.630e-01 0.445618    
## V8.Other.relative               7.811e-01  1.527e+00  5.120e-01 0.608889    
## V8.Own.child                    1.533e+00  1.591e+00  9.630e-01 0.335406    
## V8.Unmarried                    2.370e+01  3.638e+04  1.000e-03 0.999480    
## V8.Wife                                NA         NA         NA       NA    
## V9.Amer.Indian.Eskimo          -4.932e-01  3.265e-01 -1.511e+00 0.130889    
## V9.Asian.Pac.Islander           2.948e-01  2.032e-01  1.450e+00 0.146939    
## V9.Black                        9.881e-01  1.415e-01  6.985e+00 2.84e-12 ***
## V9.Other                        4.578e-01  4.690e-01  9.760e-01 0.329059    
## V9.White                               NA         NA         NA       NA    
## V10.Female                      4.686e+00  1.511e+00  3.101e+00 0.001930 ** 
## V10.Male                               NA         NA         NA       NA    
## V11                             2.798e-04  1.449e-05  1.931e+01  < 2e-16 ***
## V12                             5.656e-04  4.669e-05  1.211e+01  < 2e-16 ***
## V13                             2.005e-02  2.014e-03  9.957e+00  < 2e-16 ***
## V14..                          -3.265e-01  6.869e-01 -4.750e-01 0.634603    
## V14.Cambodia                    1.581e+00  1.115e+00  1.418e+00 0.156332    
## V14.Canada                      3.195e-01  7.366e-01  4.340e-01 0.664429    
## V14.China                      -1.030e+00  8.133e-01 -1.267e+00 0.205153    
## V14.Columbia                   -2.191e+00  1.156e+00 -1.895e+00 0.058028 .  
## V14.Cuba                        5.253e-01  7.717e-01  6.810e-01 0.496104    
## V14.Dominican.Republic         -2.697e+01  2.279e+05  0.000e+00 0.999906    
## V14.Ecuador                    -2.249e-01  1.043e+00 -2.160e-01 0.829290    
## V14.El.Salvador                -3.510e-01  9.132e-01 -3.840e-01 0.700685    
## V14.England                     2.505e-01  7.825e-01  3.200e-01 0.748876    
## V14.France                      5.230e-01  1.010e+00  5.180e-01 0.604632    
## V14.Germany                     4.033e-01  7.343e-01  5.490e-01 0.582832    
## V14.Greece                     -1.370e+00  9.195e-01 -1.490e+00 0.136305    
## V14.Guatemala                  -1.322e+00  1.963e+00 -6.740e-01 0.500601    
## V14.Haiti                       1.385e-01  1.430e+00  9.700e-02 0.922871    
## V14.Holand.Netherlands                 NA         NA         NA       NA    
## V14.Honduras                    4.504e+15  6.711e+07  6.711e+07  < 2e-16 ***
## V14.Hong                       -1.354e-01  1.065e+00 -1.270e-01 0.898818    
## V14.Hungary                    -3.015e-02  1.194e+00 -2.500e-02 0.979847    
## V14.India                      -9.204e-01  7.529e-01 -1.222e+00 0.221573    
## V14.Iran                       -6.423e-02  8.333e-01 -7.700e-02 0.938563    
## V14.Ireland                     1.402e+00  1.331e+00  1.054e+00 0.291937    
## V14.Italy                       1.437e-01  7.628e-01  1.880e-01 0.850565    
## V14.Jamaica                    -2.473e-01  9.478e-01 -2.610e-01 0.794146    
## V14.Japan                      -4.180e-01  8.248e-01 -5.070e-01 0.612321    
## V14.Laos                        2.603e+01  4.776e+05  0.000e+00 0.999957    
## V14.Mexico                      1.362e-01  7.326e-01  1.860e-01 0.852530    
## V14.Nicaragua                  -1.396e+00  1.467e+00 -9.520e-01 0.341303    
## V14.Outlying.US.Guam.USVI.etc. -2.492e+01  2.745e+05  0.000e+00 0.999928    
## V14.Peru                        2.271e-01  1.315e+00  1.730e-01 0.862897    
## V14.Philippines                 5.721e-01  7.622e-01  7.510e-01 0.452892    
## V14.Poland                     -1.261e-01  8.095e-01 -1.560e-01 0.876255    
## V14.Portugal                   -3.059e-01  1.145e+00 -2.670e-01 0.789288    
## V14.Puerto.Rico                -4.806e-01  9.040e-01 -5.320e-01 0.594987    
## V14.Scotland                    7.373e-01  1.418e+00  5.200e-01 0.603005    
## V14.South                      -9.358e-01  8.345e-01 -1.121e+00 0.262128    
## V14.Taiwan                     -5.569e-01  8.562e-01 -6.500e-01 0.515408    
## V14.Thailand                   -6.617e-02  1.450e+00 -4.600e-02 0.963608    
## V14.Trinadad.Tobago             2.728e+01  2.628e+05  0.000e+00 0.999917    
## V14.United.States               9.008e-02  6.663e-01  1.350e-01 0.892453    
## V14.Vietnam                    -1.255e+00  1.123e+00 -1.118e+00 0.263501    
## V14.Yugoslavia                         NA         NA         NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 16823  on 12205  degrees of freedom
## Residual deviance: 12903  on 12108  degrees of freedom
## AIC: 13099
## 
## Number of Fisher Scoring iterations: 25
# Predict outcome using Adult_TDA_PC_5.50.5_n2_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n2_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n2_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n2_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   1776   541
##      >50K    5640  1811
##                                           
##                Accuracy : 0.3672          
##                  95% CI : (0.3576, 0.3769)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.0054          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.2395          
##             Specificity : 0.7700          
##          Pos Pred Value : 0.7665          
##          Neg Pred Value : 0.2431          
##              Prevalence : 0.7592          
##          Detection Rate : 0.1818          
##    Detection Prevalence : 0.2372          
##       Balanced Accuracy : 0.5047          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n2_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   1776   541
##      >50K    5640  1811
##                                           
##                Accuracy : 0.3672          
##                  95% CI : (0.3576, 0.3769)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.0054          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.2395          
##             Specificity : 0.7700          
##          Pos Pred Value : 0.7665          
##          Neg Pred Value : 0.2431          
##              Prevalence : 0.7592          
##          Detection Rate : 0.1818          
##    Detection Prevalence : 0.2372          
##       Balanced Accuracy : 0.5047          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n2_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##    0.367219492    0.005439181    0.357649584    0.376868634    0.759213759 
## AccuracyPValue  McnemarPValue 
##    1.000000000    0.000000000
ad_tda_pc_5.50.5_n2_lr_cf0_ov_acc<-ad_tda_pc_5.50.5_n2_lr_cf0$overall[1]
ad_tda_pc_5.50.5_n2_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.2394822            0.7699830            0.7665084 
##       Neg Pred Value            Precision               Recall 
##            0.2430546            0.7665084            0.2394822 
##                   F1           Prevalence       Detection Rate 
##            0.3649440            0.7592138            0.1818182 
## Detection Prevalence    Balanced Accuracy 
##            0.2372031            0.5047326
ad_tda_pc_5.50.5_n2_lr_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n2_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers

### 3-fold diff

diff_tda_pca_5.50.5_lr_n2_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.50.5_n2_lr_fit_re)
diff_tda_pca_5.50.5_lr_n2_3_fold
##    Accuracy
## 1 0.1341645
## 2 0.1414115
## 3 0.1319020
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_lr.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_lr.n2_3_fold_odds.left<-bst_tda_pca_5.50.5_lr.n2_3_fold$probLeft/bst_tda_pca_5.50.5_lr.n2_3_fold$probRight
bst_tda_pca_5.50.5_lr.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_lr.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.009266667
## 
## $winRight
## [1] 0.9907333
# Bayesian Correlated Test

bct_tda_pca_5.50.5_lr.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n2_3_fold
## $left
## [1] 0.0002576845
## 
## $rope
## [1] 8.833637e-05
## 
## $right
## [1] 0.999654
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_lr_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr_n2_3_fold))
#bf_tda_pca_5.50.5_lr.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_lr_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_lr_n2_3_fold)
## t = 47.358, df = 2, p-value = 0.0004456
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.1234856 0.1481664
## sample estimates:
## mean of x 
##  0.135826
### Test set diff
diff_tda_pca_5.50.5_lr.n2_test<-(lr_cf_ov_acc - ad_tda_pc_5.50.5_n2_lr_cf0_ov_acc)
diff_tda_pca_5.50.5_lr.n2_test
##  Accuracy 
## 0.4843366
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_lr.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr.n2_test),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_lr.n2_test_odds.left<-bst_tda_pca_5.50.5_lr.n2_test$probLeft/bst_tda_pca_5.50.5_lr.n2_test$probRight
bst_tda_pca_5.50.5_lr.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_lr.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr.n2_test),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1597333
## 
## $winRight
## [1] 0.8402667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_lr.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_lr.n2_test)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr.n2_test)) #bf_tda_pca_5.50.5_lr.n2_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_lr.n2_test))

##Node3

Adult_TDA_PC_5.50.5_n3_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_adult_5.50.5.n3.vec, 
                 family = 'binomial',
                method = 'glm', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.50.5_n3_LrFit0
## Generalized Linear Model 
## 
## 13240 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8828, 8826, 8826 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8289284  0.4269351
Adult_TDA_PC_5.50.5_n3_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8349955 0.4419374    Fold1
## 2 0.8253285 0.4083828    Fold2
## 3 0.8264613 0.4304852    Fold3
ad_tda_pc_5.50.5_n3_lr_fit_re<-Adult_TDA_PC_5.50.5_n2_LrFit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n3_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (11 not defined because of singularities)
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                    -1.180e+13  8.650e+12  -1.364 0.172639    
## V1                              2.544e-03  2.296e-03   1.108 0.267922    
## V2..                            1.180e+13  8.650e+12   1.364 0.172639    
## V2.Federal.gov                  1.180e+13  8.650e+12   1.364 0.172639    
## V2.Local.gov                    1.180e+13  8.650e+12   1.364 0.172639    
## V2.Never.worked                        NA         NA      NA       NA    
## V2.Private                      1.180e+13  8.650e+12   1.364 0.172639    
## V2.Self.emp.inc                 1.180e+13  8.650e+12   1.364 0.172639    
## V2.Self.emp.not.inc             1.180e+13  8.650e+12   1.364 0.172639    
## V2.State.gov                    1.180e+13  8.650e+12   1.364 0.172639    
## V2.Without.pay                  1.180e+13  8.650e+12   1.364 0.172639    
## V3                              1.127e-06  2.361e-07   4.773 1.81e-06 ***
## V4.10th                        -3.082e-01  1.751e-01  -1.760 0.078340 .  
## V4.11th                        -4.031e-01  1.777e-01  -2.269 0.023258 *  
## V4.12th                         1.067e-01  2.582e-01   0.413 0.679442    
## V4.1st.4th                     -9.017e-01  4.887e-01  -1.845 0.065014 .  
## V4.5th.6th                     -9.334e-01  3.349e-01  -2.787 0.005318 ** 
## V4.7th.8th                     -1.341e+00  2.545e-01  -5.269 1.37e-07 ***
## V4.9th                         -9.128e-01  2.554e-01  -3.575 0.000351 ***
## V4.Assoc.acdm                  -5.927e-01  1.449e-01  -4.092 4.28e-05 ***
## V4.Assoc.voc                   -4.086e-01  1.262e-01  -3.236 0.001210 ** 
## V4.Bachelors                   -6.358e-01  8.503e-02  -7.478 7.57e-14 ***
## V4.Doctorate                    1.530e-01  2.176e-01   0.703 0.482035    
## V4.HS.grad                     -3.751e-01  6.898e-02  -5.437 5.42e-08 ***
## V4.Masters                     -4.995e-01  1.200e-01  -4.163 3.14e-05 ***
## V4.Preschool                   -3.157e+01  6.581e+04   0.000 0.999617    
## V4.Prof.school                 -7.407e-02  1.962e-01  -0.378 0.705726    
## V4.Some.college                        NA         NA      NA       NA    
## V5                                     NA         NA      NA       NA    
## V6.Divorced                    -5.751e-01  1.975e-01  -2.912 0.003586 ** 
## V6.Married.AF.spouse            1.453e+00  8.056e-01   1.804 0.071232 .  
## V6.Married.civ.spouse          -2.388e-01  3.406e-01  -0.701 0.483250    
## V6.Married.spouse.absent       -4.412e-01  3.077e-01  -1.434 0.151629    
## V6.Never.married               -3.763e-01  2.065e-01  -1.822 0.068456 .  
## V6.Separated                   -3.602e-01  2.592e-01  -1.389 0.164685    
## V6.Widowed                             NA         NA      NA       NA    
## V7..                                   NA         NA      NA       NA    
## V7.Adm.clerical                 1.219e+00  1.374e-01   8.872  < 2e-16 ***
## V7.Armed.Forces                -2.519e+01  2.088e+05   0.000 0.999904    
## V7.Craft.repair                 1.752e-01  1.230e-01   1.425 0.154147    
## V7.Exec.managerial              5.016e-01  1.296e-01   3.871 0.000108 ***
## V7.Farming.fishing             -1.295e+00  2.989e-01  -4.331 1.48e-05 ***
## V7.Handlers.cleaners            4.344e-01  1.723e-01   2.522 0.011685 *  
## V7.Machine.op.inspct            6.695e-01  1.368e-01   4.895 9.84e-07 ***
## V7.Other.service                3.295e-01  1.593e-01   2.069 0.038550 *  
## V7.Priv.house.serv             -2.618e+00  7.482e+00  -0.350 0.726442    
## V7.Prof.specialty               4.234e-01  1.366e-01   3.100 0.001937 ** 
## V7.Protective.serv              2.434e-01  2.090e-01   1.164 0.244225    
## V7.Sales                        7.925e-01  1.283e-01   6.175 6.61e-10 ***
## V7.Tech.support                 1.111e+00  1.648e-01   6.740 1.58e-11 ***
## V7.Transport.moving                    NA         NA      NA       NA    
## V8.Husband                     -8.944e-01  1.333e-01  -6.709 1.97e-11 ***
## V8.Not.in.family                3.686e-02  3.046e-01   0.121 0.903684    
## V8.Other.relative              -4.079e-01  2.831e-01  -1.441 0.149643    
## V8.Own.child                   -4.954e-01  2.973e-01  -1.666 0.095670 .  
## V8.Unmarried                    1.740e-01  3.193e-01   0.545 0.585776    
## V8.Wife                                NA         NA      NA       NA    
## V9.Amer.Indian.Eskimo           3.932e-01  2.667e-01   1.474 0.140444    
## V9.Asian.Pac.Islander           6.353e-01  2.048e-01   3.101 0.001926 ** 
## V9.Black                        9.496e-01  9.520e-02   9.975  < 2e-16 ***
## V9.Other                        4.337e-01  3.313e-01   1.309 0.190520    
## V9.White                               NA         NA      NA       NA    
## V10.Female                      1.575e+00  1.125e-01  13.996  < 2e-16 ***
## V10.Male                               NA         NA      NA       NA    
## V11                             2.870e-04  1.397e-05  20.547  < 2e-16 ***
## V12                             2.752e-04  5.333e-05   5.161 2.46e-07 ***
## V13                             4.299e-03  2.299e-03   1.870 0.061477 .  
## V14..                          -1.261e+00  8.687e-01  -1.451 0.146647    
## V14.Cambodia                   -2.165e-02  1.122e+00  -0.019 0.984608    
## V14.Canada                     -1.081e+00  9.282e-01  -1.165 0.244126    
## V14.China                      -2.365e+00  1.014e+00  -2.333 0.019646 *  
## V14.Columbia                   -2.434e+00  1.350e+00  -1.803 0.071370 .  
## V14.Cuba                       -2.517e-01  9.402e-01  -0.268 0.788946    
## V14.Dominican.Republic         -2.448e+00  1.379e+00  -1.775 0.075862 .  
## V14.Ecuador                    -3.383e-01  1.170e+00  -0.289 0.772471    
## V14.El.Salvador                -1.134e+00  1.040e+00  -1.090 0.275655    
## V14.England                    -5.701e-01  9.346e-01  -0.610 0.541868    
## V14.France                     -8.438e-01  1.125e+00  -0.750 0.453229    
## V14.Germany                    -3.181e-01  9.164e-01  -0.347 0.728528    
## V14.Greece                     -2.236e+00  1.227e+00  -1.823 0.068329 .  
## V14.Guatemala                  -9.200e-01  1.120e+00  -0.821 0.411456    
## V14.Haiti                      -5.254e-01  1.101e+00  -0.477 0.633061    
## V14.Holand.Netherlands                 NA         NA      NA       NA    
## V14.Honduras                    2.462e+01  4.015e+05   0.000 0.999951    
## V14.Hong                       -1.924e+00  1.476e+00  -1.303 0.192448    
## V14.Hungary                    -1.522e+00  1.448e+00  -1.051 0.293204    
## V14.India                      -1.700e+00  9.754e-01  -1.743 0.081350 .  
## V14.Iran                       -1.922e+00  1.135e+00  -1.693 0.090511 .  
## V14.Ireland                    -8.795e-01  1.248e+00  -0.705 0.480868    
## V14.Italy                      -6.023e-01  9.656e-01  -0.624 0.532777    
## V14.Jamaica                    -5.346e-01  9.737e-01  -0.549 0.582947    
## V14.Japan                      -8.683e-01  1.004e+00  -0.865 0.387015    
## V14.Laos                       -2.194e+00  1.435e+00  -1.528 0.126423    
## V14.Mexico                     -1.498e+00  8.854e-01  -1.691 0.090751 .  
## V14.Nicaragua                  -1.320e+00  1.182e+00  -1.117 0.264098    
## V14.Outlying.US.Guam.USVI.etc. -2.500e+01  1.811e+05   0.000 0.999890    
## V14.Peru                       -1.708e+00  1.441e+00  -1.185 0.235994    
## V14.Philippines                -6.762e-01  9.112e-01  -0.742 0.458005    
## V14.Poland                     -9.216e-01  9.850e-01  -0.936 0.349454    
## V14.Portugal                   -1.939e+00  1.388e+00  -1.397 0.162338    
## V14.Puerto.Rico                -1.003e+00  9.656e-01  -1.038 0.299097    
## V14.Scotland                   -7.464e-01  1.355e+00  -0.551 0.581801    
## V14.South                      -1.846e+00  1.019e+00  -1.812 0.070008 .  
## V14.Taiwan                     -1.104e+00  1.055e+00  -1.047 0.295222    
## V14.Thailand                   -1.627e+00  1.500e+00  -1.085 0.277885    
## V14.Trinadad.Tobago            -9.675e-01  1.215e+00  -0.796 0.425941    
## V14.United.States              -9.622e-01  8.492e-01  -1.133 0.257193    
## V14.Vietnam                    -1.750e+00  1.071e+00  -1.634 0.102245    
## V14.Yugoslavia                         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 14233  on 13239  degrees of freedom
## Residual deviance: 10681  on 13142  degrees of freedom
## AIC: 10877
## 
## Number of Fisher Scoring iterations: 25
# Predict outcome using Adult_TDA_PC_5.50.5_n3_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n3_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n3_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n3_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   5118  1752
##      >50K    2298   600
##                                           
##                Accuracy : 0.5854          
##                  95% CI : (0.5755, 0.5952)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.0507         
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.6901          
##             Specificity : 0.2551          
##          Pos Pred Value : 0.7450          
##          Neg Pred Value : 0.2070          
##              Prevalence : 0.7592          
##          Detection Rate : 0.5240          
##    Detection Prevalence : 0.7033          
##       Balanced Accuracy : 0.4726          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n3_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   5118  1752
##      >50K    2298   600
##                                           
##                Accuracy : 0.5854          
##                  95% CI : (0.5755, 0.5952)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.0507         
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.6901          
##             Specificity : 0.2551          
##          Pos Pred Value : 0.7450          
##          Neg Pred Value : 0.2070          
##              Prevalence : 0.7592          
##          Detection Rate : 0.5240          
##    Detection Prevalence : 0.7033          
##       Balanced Accuracy : 0.4726          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n3_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   5.853808e-01  -5.074639e-02   5.755355e-01   5.951752e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.000000e+00   1.091610e-17
ad_tda_pc_5.50.5_n3_lr_cf0_ov_acc<-ad_tda_pc_5.50.5_n3_lr_cf0$overall[1]
ad_tda_pc_5.50.5_n3_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.6901294            0.2551020            0.7449782 
##       Neg Pred Value            Precision               Recall 
##            0.2070393            0.7449782            0.6901294 
##                   F1           Prevalence       Detection Rate 
##            0.7165057            0.7592138            0.5239558 
## Detection Prevalence    Balanced Accuracy 
##            0.7033170            0.4726157
ad_tda_pc_5.50.5_n3_lr_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n3_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers

### 3-fold diff

diff_tda_pca_5.50.5_lr_n3_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.50.5_n3_lr_fit_re)
diff_tda_pca_5.50.5_lr_n3_3_fold
##    Accuracy
## 1 0.1341645
## 2 0.1414115
## 3 0.1319020
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_lr.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_lr.n3_3_fold_odds.left<-bst_tda_pca_5.50.5_lr.n3_3_fold$probLeft/bst_tda_pca_5.50.5_lr.n3_3_fold$probRight
bst_tda_pca_5.50.5_lr.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_lr.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.009433333
## 
## $winRight
## [1] 0.9905667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_lr.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n3_3_fold
## $left
## [1] 0.0002576845
## 
## $rope
## [1] 8.833637e-05
## 
## $right
## [1] 0.999654
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_lr_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr_n3_3_fold))
#bf_tda_pca_5.50.5_lr.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_lr_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_lr_n3_3_fold)
## t = 47.358, df = 2, p-value = 0.0004456
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.1234856 0.1481664
## sample estimates:
## mean of x 
##  0.135826
### Test set diff
diff_tda_pca_5.50.5_lr.n3_test<-(lr_cf_ov_acc - ad_tda_pc_5.50.5_n3_lr_cf0_ov_acc)
diff_tda_pca_5.50.5_lr.n3_test
##  Accuracy 
## 0.2661753
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_lr.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr.n3_test),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_lr.n3_test_odds.left<-bst_tda_pca_5.50.5_lr.n3_test$probLeft/bst_tda_pca_5.50.5_lr.n3_test$probRight
bst_tda_pca_5.50.5_lr.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_lr.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr.n3_test),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1579667
## 
## $winRight
## [1] 0.8420333
# Bayesian Correlated Test

bct_tda_pca_5.50.5_lr.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_lr.n3_test)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr.n3_test)) #bf_tda_pca_5.50.5_lr.n3_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_lr.n3_test))

##Node4

Adult_TDA_PC_5.50.5_n4_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_adult_5.50.5.n4.vec, 
                 family = 'binomial',
                method = 'glm', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.50.5_n4_LrFit0
## Generalized Linear Model 
## 
## 16700 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 11134, 11133, 11133 
## Resampling results:
## 
##   Accuracy   Kappa     
##   0.6692921  0.03635732
Adult_TDA_PC_5.50.5_n4_LrFit0$resample
##    Accuracy      Kappa Resample
## 1 0.8474668 0.04113347    Fold1
## 2 0.8491108 0.05403147    Fold2
## 3 0.3112987 0.01390702    Fold3
ad_tda_pc_5.50.5_n4_lr_fit_re<-Adult_TDA_PC_5.50.5_n4_LrFit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n4_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (9 not defined because of singularities)
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     5.026e+12  1.131e+13   0.445 0.656642    
## V1                              2.109e-02  3.735e-03   5.647 1.63e-08 ***
## V2..                           -5.026e+12  1.131e+13  -0.445 0.656642    
## V2.Federal.gov                 -5.026e+12  1.131e+13  -0.445 0.656642    
## V2.Local.gov                   -5.026e+12  1.131e+13  -0.445 0.656642    
## V2.Never.worked                -5.026e+12  1.131e+13  -0.445 0.656642    
## V2.Private                     -5.026e+12  1.131e+13  -0.445 0.656642    
## V2.Self.emp.inc                -5.026e+12  1.131e+13  -0.445 0.656642    
## V2.Self.emp.not.inc            -5.026e+12  1.131e+13  -0.445 0.656642    
## V2.State.gov                   -5.026e+12  1.131e+13  -0.445 0.656642    
## V2.Without.pay                 -5.026e+12  1.131e+13  -0.445 0.656642    
## V3                              7.685e-07  3.631e-07   2.116 0.034327 *  
## V4.10th                        -1.539e+00  4.888e-01  -3.148 0.001642 ** 
## V4.11th                        -3.349e-01  3.036e-01  -1.103 0.270076    
## V4.12th                        -8.699e-01  4.911e-01  -1.771 0.076505 .  
## V4.1st.4th                     -2.287e+01  2.637e+04  -0.001 0.999308    
## V4.5th.6th                     -9.827e-01  6.698e-01  -1.467 0.142306    
## V4.7th.8th                     -9.913e-01  4.586e-01  -2.162 0.030647 *  
## V4.9th                         -3.022e-01  4.128e-01  -0.732 0.464162    
## V4.Assoc.acdm                  -1.521e-02  1.939e-01  -0.078 0.937472    
## V4.Assoc.voc                   -2.068e-02  1.890e-01  -0.109 0.912866    
## V4.Bachelors                    3.228e-01  1.220e-01   2.646 0.008142 ** 
## V4.Doctorate                    1.209e+00  3.656e-01   3.307 0.000943 ***
## V4.HS.grad                     -4.158e-01  1.105e-01  -3.763 0.000168 ***
## V4.Masters                      4.200e-01  1.832e-01   2.292 0.021895 *  
## V4.Preschool                   -2.166e+02  1.119e+07   0.000 0.999985    
## V4.Prof.school                  5.598e-01  3.690e-01   1.517 0.129205    
## V4.Some.college                        NA         NA      NA       NA    
## V5                                     NA         NA      NA       NA    
## V6.Divorced                     1.284e-02  1.847e-01   0.070 0.944562    
## V6.Married.AF.spouse            3.113e+00  7.265e-01   4.285 1.83e-05 ***
## V6.Married.civ.spouse           1.928e+00  4.073e-01   4.734 2.20e-06 ***
## V6.Married.spouse.absent        4.426e-02  3.222e-01   0.137 0.890729    
## V6.Never.married               -2.178e-01  2.053e-01  -1.061 0.288801    
## V6.Separated                   -3.133e-01  2.699e-01  -1.161 0.245690    
## V6.Widowed                             NA         NA      NA       NA    
## V7..                                   NA         NA      NA       NA    
## V7.Adm.clerical                 5.661e-02  2.573e-01   0.220 0.825873    
## V7.Armed.Forces                -2.308e+01  1.302e+05   0.000 0.999859    
## V7.Craft.repair                 1.553e-01  2.726e-01   0.570 0.568930    
## V7.Exec.managerial              2.120e-01  2.630e-01   0.806 0.420201    
## V7.Farming.fishing             -2.666e+00  1.019e+00  -2.615 0.008920 ** 
## V7.Handlers.cleaners           -7.790e-01  4.203e-01  -1.853 0.063838 .  
## V7.Machine.op.inspct           -7.407e-01  3.248e-01  -2.280 0.022592 *  
## V7.Other.service               -5.282e-01  2.812e-01  -1.878 0.060362 .  
## V7.Priv.house.serv             -4.245e+00  2.429e+00  -1.748 0.080495 .  
## V7.Prof.specialty               3.726e-02  2.693e-01   0.138 0.889934    
## V7.Protective.serv              7.033e-01  3.679e-01   1.912 0.055919 .  
## V7.Sales                        4.649e-02  2.667e-01   0.174 0.861630    
## V7.Tech.support                 2.779e-01  3.017e-01   0.921 0.357044    
## V7.Transport.moving                    NA         NA      NA       NA    
## V8.Husband                      4.241e+02  3.242e+05   0.001 0.998956    
## V8.Not.in.family               -1.275e-01  3.771e-01  -0.338 0.735328    
## V8.Other.relative              -1.607e+00  3.870e-01  -4.153 3.28e-05 ***
## V8.Own.child                   -1.321e+00  3.610e-01  -3.658 0.000254 ***
## V8.Unmarried                   -1.855e-01  3.874e-01  -0.479 0.632133    
## V8.Wife                                NA         NA      NA       NA    
## V9.Amer.Indian.Eskimo           2.881e-01  3.548e-01   0.812 0.416757    
## V9.Asian.Pac.Islander           4.901e-01  2.870e-01   1.707 0.087742 .  
## V9.Black                        1.134e-01  1.326e-01   0.855 0.392497    
## V9.Other                        6.152e-02  4.913e-01   0.125 0.900359    
## V9.White                               NA         NA      NA       NA    
## V10.Female                      2.783e-01  1.074e-01   2.590 0.009585 ** 
## V10.Male                               NA         NA      NA       NA    
## V11                             3.618e-04  1.712e-05  21.131  < 2e-16 ***
## V12                             3.504e-04  9.101e-05   3.850 0.000118 ***
## V13                             2.382e-02  3.499e-03   6.809 9.82e-12 ***
## V14..                          -2.180e+00  1.310e+00  -1.665 0.095997 .  
## V14.Cambodia                   -2.357e+01  4.374e+04  -0.001 0.999570    
## V14.Canada                     -2.264e+00  1.406e+00  -1.610 0.107299    
## V14.China                      -2.090e+00  1.447e+00  -1.444 0.148672    
## V14.Columbia                   -2.527e+01  5.220e+04   0.000 0.999614    
## V14.Cuba                       -2.811e+00  1.498e+00  -1.877 0.060565 .  
## V14.Dominican.Republic         -2.739e+00  1.671e+00  -1.639 0.101231    
## V14.Ecuador                    -2.487e+01  8.388e+04   0.000 0.999763    
## V14.El.Salvador                -2.753e+00  1.689e+00  -1.630 0.103163    
## V14.England                    -2.255e+00  1.407e+00  -1.603 0.108979    
## V14.France                     -2.673e+00  1.776e+00  -1.505 0.132281    
## V14.Germany                    -2.223e+00  1.377e+00  -1.614 0.106540    
## V14.Greece                     -1.724e+00  1.752e+00  -0.984 0.325093    
## V14.Guatemala                  -4.375e-01  1.506e+00  -0.291 0.771422    
## V14.Haiti                      -2.640e+00  1.704e+00  -1.549 0.121412    
## V14.Holand.Netherlands         -2.397e+01  3.370e+05   0.000 0.999943    
## V14.Honduras                   -2.508e+01  1.011e+05   0.000 0.999802    
## V14.Hong                       -2.605e+01  8.869e+04   0.000 0.999766    
## V14.Hungary                    -1.846e+00  1.721e+00  -1.073 0.283395    
## V14.India                      -2.515e+00  1.526e+00  -1.648 0.099286 .  
## V14.Iran                       -2.600e+01  8.747e+04   0.000 0.999763    
## V14.Ireland                    -1.923e+00  1.673e+00  -1.150 0.250346    
## V14.Italy                      -1.033e+00  1.412e+00  -0.732 0.464434    
## V14.Jamaica                    -1.851e+00  1.526e+00  -1.212 0.225324    
## V14.Japan                      -1.023e+00  1.404e+00  -0.729 0.466262    
## V14.Laos                       -2.286e+00  1.742e+00  -1.312 0.189437    
## V14.Mexico                     -2.746e+00  1.369e+00  -2.006 0.044837 *  
## V14.Nicaragua                  -1.642e+00  1.723e+00  -0.953 0.340633    
## V14.Outlying.US.Guam.USVI.etc. -2.619e+01  9.667e+04   0.000 0.999784    
## V14.Peru                       -2.527e+01  7.152e+04   0.000 0.999718    
## V14.Philippines                -2.303e+00  1.370e+00  -1.681 0.092735 .  
## V14.Poland                     -2.459e+00  1.651e+00  -1.489 0.136359    
## V14.Portugal                   -1.331e+00  1.537e+00  -0.866 0.386319    
## V14.Puerto.Rico                -2.014e+00  1.384e+00  -1.455 0.145592    
## V14.Scotland                   -2.578e+00  1.821e+00  -1.416 0.156909    
## V14.South                      -3.242e+00  1.538e+00  -2.108 0.035001 *  
## V14.Taiwan                     -1.898e+00  1.527e+00  -1.243 0.213856    
## V14.Thailand                   -2.681e+01  1.114e+05   0.000 0.999808    
## V14.Trinadad.Tobago            -2.591e+01  1.070e+05   0.000 0.999807    
## V14.United.States              -2.274e+00  1.281e+00  -1.776 0.075762 .  
## V14.Vietnam                    -2.216e+00  1.562e+00  -1.419 0.155991    
## V14.Yugoslavia                         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance:   7122.1  on 16699  degrees of freedom
## Residual deviance: 130876.7  on 16600  degrees of freedom
## AIC: 131077
## 
## Number of Fisher Scoring iterations: 25
# Predict outcome using Adult_TDA_PC_5.50.5_n4_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n4_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n4_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n4_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   5178   443
##      >50K    2238  1909
##                                           
##                Accuracy : 0.7255          
##                  95% CI : (0.7166, 0.7344)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.4045          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.6982          
##             Specificity : 0.8116          
##          Pos Pred Value : 0.9212          
##          Neg Pred Value : 0.4603          
##              Prevalence : 0.7592          
##          Detection Rate : 0.5301          
##    Detection Prevalence : 0.5755          
##       Balanced Accuracy : 0.7549          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n4_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   5178   443
##      >50K    2238  1909
##                                           
##                Accuracy : 0.7255          
##                  95% CI : (0.7166, 0.7344)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0.4045          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.6982          
##             Specificity : 0.8116          
##          Pos Pred Value : 0.9212          
##          Neg Pred Value : 0.4603          
##              Prevalence : 0.7592          
##          Detection Rate : 0.5301          
##    Detection Prevalence : 0.5755          
##       Balanced Accuracy : 0.7549          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n4_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.255324e-01   4.044762e-01   7.165653e-01   7.343648e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.000000e+00  4.842908e-263
ad_tda_pc_5.50.5_n4_lr_cf0_ov_acc<-ad_tda_pc_5.50.5_n4_lr_cf0$overall[1]
ad_tda_pc_5.50.5_n4_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.6982201            0.8116497            0.9211884 
##       Neg Pred Value            Precision               Recall 
##            0.4603328            0.9211884            0.6982201 
##                   F1           Prevalence       Detection Rate 
##            0.7943545            0.7592138            0.5300983 
## Detection Prevalence    Balanced Accuracy 
##            0.5754505            0.7549349
ad_tda_pc_5.50.5_n4_lr_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n4_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers

### 3-fold diff

diff_tda_pca_5.50.5_lr_n4_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.50.5_n4_lr_fit_re)
diff_tda_pca_5.50.5_lr_n4_3_fold
##      Accuracy
## 1 0.003020208
## 2 0.007148218
## 3 0.533134021
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_lr.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n4_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.75
## 
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_lr.n4_3_fold_odds.left<-bst_tda_pca_5.50.5_lr.n4_3_fold$probLeft/bst_tda_pca_5.50.5_lr.n4_3_fold$probRight
bst_tda_pca_5.50.5_lr.n4_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_lr.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n4_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.5825333
## 
## $winRight
## [1] 0.4174667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_lr.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n4_3_fold
## $left
## [1] 0.2231778
## 
## $rope
## [1] 0.02107443
## 
## $right
## [1] 0.7557478
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_lr_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr_n4_3_fold))
#bf_tda_pca_5.50.5_lr.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_lr_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_lr_n4_3_fold)
## t = 1.0289, df = 2, p-value = 0.4117
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.5762549  0.9384565
## sample estimates:
## mean of x 
## 0.1811008
### Test set diff
diff_tda_pca_5.50.5_lr.n4_test<-(lr_cf_ov_acc - ad_tda_pc_5.50.5_n4_lr_cf0_ov_acc)
diff_tda_pca_5.50.5_lr.n4_test
##  Accuracy 
## 0.1260238
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_lr.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr.n4_test),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_lr.n4_test_odds.left<-bst_tda_pca_5.50.5_lr.n4_test$probLeft/bst_tda_pca_5.50.5_lr.n4_test$probRight
bst_tda_pca_5.50.5_lr.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_lr.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr.n4_test),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1605
## 
## $winRight
## [1] 0.8395
# Bayesian Correlated Test

bct_tda_pca_5.50.5_lr.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_lr.n4_test)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr.n4_test)) #bf_tda_pca_5.50.5_lr.n4_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_lr.n4_test))

##Node5

Adult_TDA_PC_5.50.5_n5_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_adult_5.50.5.n5.vec, 
                 family = 'binomial',
                method = 'glm', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.50.5_n5_LrFit0
## Generalized Linear Model 
## 
## 14404 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9602, 9604, 9602 
## Resampling results:
## 
##   Accuracy   Kappa       
##   0.9967371  -0.001392465
Adult_TDA_PC_5.50.5_n5_LrFit0$resample
##    Accuracy         Kappa Resample
## 1 0.9972928 -0.0009620627    Fold1
## 2 0.9975000 -0.0009383797    Fold2
## 3 0.9954186 -0.0022769534    Fold3
ad_tda_pc_5.50.5_n5_lr_fit_re<-Adult_TDA_PC_5.50.5_n5_LrFit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n5_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (11 not defined because of singularities)
##                                  Estimate Std. Error    z value Pr(>|z|)    
## (Intercept)                    -2.162e+15  4.232e+07  -51101694   <2e-16 ***
## V1                              2.376e+12  6.085e+04   39044136   <2e-16 ***
## V2..                           -2.941e+15  2.417e+07 -121660805   <2e-16 ***
## V2.Federal.gov                 -1.699e+14  2.417e+07   -7030029   <2e-16 ***
## V2.Local.gov                   -2.387e+15  2.400e+07  -99452186   <2e-16 ***
## V2.Never.worked                -5.359e+14  3.500e+07  -15312477   <2e-16 ***
## V2.Private                     -1.711e+15  2.386e+07  -71711904   <2e-16 ***
## V2.Self.emp.inc                -1.060e+15  2.524e+07  -42001880   <2e-16 ***
## V2.Self.emp.not.inc            -2.324e+15  2.413e+07  -96314772   <2e-16 ***
## V2.State.gov                   -2.249e+15  2.405e+07  -93530750   <2e-16 ***
## V2.Without.pay                         NA         NA         NA       NA    
## V3                              1.936e+08  5.347e+00   36197956   <2e-16 ***
## V4.10th                         8.384e+14  3.033e+06  276394991   <2e-16 ***
## V4.11th                        -2.572e+14  2.616e+06  -98302261   <2e-16 ***
## V4.12th                         1.142e+15  3.992e+06  285991075   <2e-16 ***
## V4.1st.4th                      4.872e+14  7.471e+06   65209586   <2e-16 ***
## V4.5th.6th                      4.300e+14  5.646e+06   76171359   <2e-16 ***
## V4.7th.8th                     -2.477e+14  4.295e+06  -57658375   <2e-16 ***
## V4.9th                          8.353e+14  4.195e+06  199139001   <2e-16 ***
## V4.Assoc.acdm                   1.640e+15  3.285e+06  499144179   <2e-16 ***
## V4.Assoc.voc                    1.068e+13  3.097e+06    3446569   <2e-16 ***
## V4.Bachelors                    1.544e+15  2.227e+06  693399248   <2e-16 ***
## V4.Doctorate                    1.841e+15  2.545e+07   72331489   <2e-16 ***
## V4.HS.grad                     -2.490e+14  1.468e+06 -169661737   <2e-16 ***
## V4.Masters                     -6.746e+14  4.802e+06 -140470411   <2e-16 ***
## V4.Preschool                    5.415e+14  1.147e+07   47195269   <2e-16 ***
## V4.Prof.school                  1.703e+15  1.519e+07  112129727   <2e-16 ***
## V4.Some.college                        NA         NA         NA       NA    
## V5                                     NA         NA         NA       NA    
## V6.Divorced                     4.259e+14  2.874e+06  148178073   <2e-16 ***
## V6.Married.AF.spouse           -1.543e+14  2.463e+07   -6265107   <2e-16 ***
## V6.Married.civ.spouse           1.217e+15  7.595e+06  160187500   <2e-16 ***
## V6.Married.spouse.absent        1.506e+13  4.792e+06    3142987   <2e-16 ***
## V6.Never.married                4.063e+14  3.141e+06  129339448   <2e-16 ***
## V6.Separated                    3.580e+14  3.572e+06  100204898   <2e-16 ***
## V6.Widowed                             NA         NA         NA       NA    
## V7..                                   NA         NA         NA       NA    
## V7.Adm.clerical                -1.017e+15  3.743e+06 -271623876   <2e-16 ***
## V7.Armed.Forces                -1.176e+15  3.396e+07  -34625127   <2e-16 ***
## V7.Craft.repair                 6.660e+14  4.032e+06  165173011   <2e-16 ***
## V7.Exec.managerial             -2.748e+14  4.233e+06  -64926351   <2e-16 ***
## V7.Farming.fishing              7.219e+14  5.332e+06  135380403   <2e-16 ***
## V7.Handlers.cleaners            7.555e+14  4.069e+06  185659303   <2e-16 ***
## V7.Machine.op.inspct            5.763e+13  4.033e+06   14288309   <2e-16 ***
## V7.Other.service                3.967e+13  3.697e+06   10731345   <2e-16 ***
## V7.Priv.house.serv             -7.565e+14  6.687e+06 -113133292   <2e-16 ***
## V7.Prof.specialty              -2.301e+14  4.284e+06  -53698560   <2e-16 ***
## V7.Protective.serv              5.842e+13  6.468e+06    9032610   <2e-16 ***
## V7.Sales                        1.079e+14  3.869e+06   27884603   <2e-16 ***
## V7.Tech.support                -1.244e+15  4.879e+06 -255023701   <2e-16 ***
## V7.Transport.moving                    NA         NA         NA       NA    
## V8.Husband                             NA         NA         NA       NA    
## V8.Not.in.family                1.050e+15  7.815e+06  134312655   <2e-16 ***
## V8.Other.relative               6.155e+14  7.766e+06   79260148   <2e-16 ***
## V8.Own.child                    6.824e+14  7.775e+06   87770147   <2e-16 ***
## V8.Unmarried                    1.163e+15  7.891e+06  147418645   <2e-16 ***
## V8.Wife                                NA         NA         NA       NA    
## V9.Amer.Indian.Eskimo           7.955e+14  5.009e+06  158791158   <2e-16 ***
## V9.Asian.Pac.Islander           5.606e+14  4.575e+06  122534488   <2e-16 ***
## V9.Black                        5.857e+13  1.646e+06   35588559   <2e-16 ***
## V9.Other                       -1.631e+14  5.283e+06  -30878904   <2e-16 ***
## V9.White                               NA         NA         NA       NA    
## V10.Female                      1.778e+15  1.416e+06 1256171611   <2e-16 ***
## V10.Male                               NA         NA         NA       NA    
## V11                             2.519e+10  6.411e+02   39287154   <2e-16 ***
## V12                             6.275e+11  2.151e+03  291765144   <2e-16 ***
## V13                            -4.315e+11  5.322e+04   -8106976   <2e-16 ***
## V14..                          -1.353e+15  3.389e+07  -39929374   <2e-16 ***
## V14.Cambodia                   -1.630e+15  4.235e+07  -38482872   <2e-16 ***
## V14.Canada                     -1.194e+15  3.516e+07  -33962427   <2e-16 ***
## V14.China                      -3.368e+15  3.680e+07  -91523027   <2e-16 ***
## V14.Columbia                   -9.336e+14  3.529e+07  -26452174   <2e-16 ***
## V14.Cuba                       -1.658e+15  3.517e+07  -47133098   <2e-16 ***
## V14.Dominican.Republic         -7.311e+14  3.493e+07  -20929588   <2e-16 ***
## V14.Ecuador                    -8.851e+14  3.764e+07  -23516833   <2e-16 ***
## V14.El.Salvador                -1.400e+15  3.455e+07  -40520006   <2e-16 ***
## V14.England                    -9.049e+14  3.546e+07  -25518749   <2e-16 ***
## V14.France                     -1.156e+15  4.041e+07  -28616005   <2e-16 ***
## V14.Germany                    -1.720e+15  3.467e+07  -49613234   <2e-16 ***
## V14.Greece                     -1.254e+15  4.509e+07  -27813651   <2e-16 ***
## V14.Guatemala                  -1.687e+15  3.503e+07  -48141120   <2e-16 ***
## V14.Haiti                      -1.119e+15  3.557e+07  -31471154   <2e-16 ***
## V14.Holand.Netherlands         -4.936e+15  7.528e+07  -65570303   <2e-16 ***
## V14.Honduras                   -1.554e+15  3.884e+07  -40018740   <2e-16 ***
## V14.Hong                       -1.433e+15  4.054e+07  -35339499   <2e-16 ***
## V14.Hungary                    -1.455e+14  4.509e+07   -3227333   <2e-16 ***
## V14.India                      -1.478e+15  3.687e+07  -40085617   <2e-16 ***
## V14.Iran                       -1.369e+14  4.335e+07   -3157781   <2e-16 ***
## V14.Ireland                    -2.012e+15  3.844e+07  -52329979   <2e-16 ***
## V14.Italy                      -1.160e+15  3.683e+07  -31487069   <2e-16 ***
## V14.Jamaica                    -8.793e+14  3.479e+07  -25275953   <2e-16 ***
## V14.Japan                      -1.230e+15  3.659e+07  -33630842   <2e-16 ***
## V14.Laos                       -1.349e+15  4.002e+07  -33707314   <2e-16 ***
## V14.Mexico                     -1.540e+15  3.383e+07  -45503793   <2e-16 ***
## V14.Nicaragua                  -4.598e+14  3.682e+07  -12488819   <2e-16 ***
## V14.Outlying.US.Guam.USVI.etc. -1.919e+15  3.923e+07  -48921853   <2e-16 ***
## V14.Peru                       -3.046e+15  3.667e+07  -83061823   <2e-16 ***
## V14.Philippines                -3.528e+15  3.460e+07 -101957990   <2e-16 ***
## V14.Poland                     -3.354e+15  3.630e+07  -92405163   <2e-16 ***
## V14.Portugal                   -1.466e+15  3.760e+07  -38980594   <2e-16 ***
## V14.Puerto.Rico                -7.184e+14  3.454e+07  -20800085   <2e-16 ***
## V14.Scotland                   -3.348e+14  4.508e+07   -7426195   <2e-16 ***
## V14.South                      -1.565e+15  3.570e+07  -43840023   <2e-16 ***
## V14.Taiwan                     -3.826e+15  3.745e+07 -102162557   <2e-16 ***
## V14.Thailand                   -2.314e+15  3.946e+07  -58644892   <2e-16 ***
## V14.Trinadad.Tobago            -1.090e+15  3.926e+07  -27767055   <2e-16 ***
## V14.United.States              -2.224e+15  3.360e+07  -66189140   <2e-16 ***
## V14.Vietnam                    -1.710e+15  3.544e+07  -48267228   <2e-16 ***
## V14.Yugoslavia                         NA         NA         NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance:  418.0  on 14403  degrees of freedom
## Residual deviance: 5550.7  on 14306  degrees of freedom
## AIC: 5746.7
## 
## Number of Fisher Scoring iterations: 25
# Predict outcome using Adult_TDA_PC_5.50.5_n5_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n5_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n5_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n5_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7389  2336
##      >50K      27    16
##                                           
##                Accuracy : 0.7581          
##                  95% CI : (0.7495, 0.7666)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.608           
##                                           
##                   Kappa : 0.0048          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.996359        
##             Specificity : 0.006803        
##          Pos Pred Value : 0.759794        
##          Neg Pred Value : 0.372093        
##              Prevalence : 0.759214        
##          Detection Rate : 0.756450        
##    Detection Prevalence : 0.995598        
##       Balanced Accuracy : 0.501581        
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n5_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7389  2336
##      >50K      27    16
##                                           
##                Accuracy : 0.7581          
##                  95% CI : (0.7495, 0.7666)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.608           
##                                           
##                   Kappa : 0.0048          
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.996359        
##             Specificity : 0.006803        
##          Pos Pred Value : 0.759794        
##          Neg Pred Value : 0.372093        
##              Prevalence : 0.759214        
##          Detection Rate : 0.756450        
##    Detection Prevalence : 0.995598        
##       Balanced Accuracy : 0.501581        
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n5_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##    0.758087633    0.004756105    0.749467767    0.766553357    0.759213759 
## AccuracyPValue  McnemarPValue 
##    0.607970499    0.000000000
ad_tda_pc_5.50.5_n5_lr_cf0_ov_acc<-ad_tda_pc_5.50.5_n5_lr_cf0$overall[1]
ad_tda_pc_5.50.5_n5_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##          0.996359223          0.006802721          0.759794344 
##       Neg Pred Value            Precision               Recall 
##          0.372093023          0.759794344          0.996359223 
##                   F1           Prevalence       Detection Rate 
##          0.862143399          0.759213759          0.756449631 
## Detection Prevalence    Balanced Accuracy 
##          0.995597871          0.501580972
ad_tda_pc_5.50.5_n5_lr_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n5_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers

### 3-fold diff

diff_tda_pca_5.50.5_lr_n5_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.50.5_n5_lr_fit_re)
diff_tda_pca_5.50.5_lr_n5_3_fold
##     Accuracy
## 1 -0.1468058
## 2 -0.1412410
## 3 -0.1509858
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_lr.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_lr.n5_3_fold_odds.left<-bst_tda_pca_5.50.5_lr.n5_3_fold$probLeft/bst_tda_pca_5.50.5_lr.n5_3_fold$probRight
bst_tda_pca_5.50.5_lr.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_lr.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n5_3_fold
## $winLeft
## [1] 0.9906333
## 
## $winRope
## [1] 0.009366667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_lr.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n5_3_fold
## $left
## [1] 0.9997145
## 
## $rope
## [1] 6.831844e-05
## 
## $right
## [1] 0.000217144
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_lr_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr_n5_3_fold))
#bf_tda_pca_5.50.5_lr.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_lr_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_lr_n5_3_fold)
## t = -51.848, df = 2, p-value = 0.0003718
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1584887 -0.1341997
## sample estimates:
##  mean of x 
## -0.1463442
### Test set diff
diff_tda_pca_5.50.5_lr.n5_test<-(lr_cf_ov_acc - ad_tda_pc_5.50.5_n5_lr_cf0_ov_acc)
diff_tda_pca_5.50.5_lr.n5_test
##   Accuracy 
## 0.09346847
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_lr.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_lr.n5_test),-0.01,0.01)
bst_tda_pca_5.50.5_lr.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_lr.n5_test_odds.left<-bst_tda_pca_5.50.5_lr.n5_test$probLeft/bst_tda_pca_5.50.5_lr.n5_test$probRight
bst_tda_pca_5.50.5_lr.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_lr.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_lr.n5_test),-0.01,0.01)
bsr_tda_pca_5.50.5_lr.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1603
## 
## $winRight
## [1] 0.8397
# Bayesian Correlated Test

bct_tda_pca_5.50.5_lr.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_lr.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_lr.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_lr.n5_test)))

#BayesFactor
#bf_tda_pca_5.50.5_lr.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_lr.n5_test)) #bf_tda_pca_5.50.5_lr.n5_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_lr.n5_test))


##With TDA KDE filter 5 intervals, 50% overlap, 5 bins 
##Node1


Adult_TDA_KDE_5.50.5_n1_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_kde_adult_5.50.5.n1.vec, 
                 family = 'binomial',
                method = 'glm', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.50.5_n1_LrFit0
## Generalized Linear Model 
## 
## 13387 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8926, 8924, 8924 
## Resampling results:
## 
##   Accuracy  Kappa    
##   0.859117  0.6173693
Adult_TDA_KDE_5.50.5_n1_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8590002 0.6140018    Fold1
## 2 0.8559265 0.6103198    Fold2
## 3 0.8624244 0.6277862    Fold3
ad_tda_kde_5.50.5_n1_lr_fit_re<-Adult_TDA_KDE_5.50.5_n1_LrFit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n1_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (9 not defined because of singularities)
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                    -2.158e+13  1.510e+13  -1.429 0.153005    
## V1                              1.441e-02  2.451e-03   5.878 4.15e-09 ***
## V2..                            2.158e+13  1.510e+13   1.429 0.153005    
## V2.Federal.gov                  2.158e+13  1.510e+13   1.429 0.153005    
## V2.Local.gov                    2.158e+13  1.510e+13   1.429 0.153005    
## V2.Never.worked                 2.158e+13  1.510e+13   1.429 0.153005    
## V2.Private                      2.158e+13  1.510e+13   1.429 0.153005    
## V2.Self.emp.inc                 2.158e+13  1.510e+13   1.429 0.153005    
## V2.Self.emp.not.inc             2.158e+13  1.510e+13   1.429 0.153005    
## V2.State.gov                    2.158e+13  1.510e+13   1.429 0.153005    
## V2.Without.pay                  2.158e+13  1.510e+13   1.429 0.153005    
## V3                              6.893e-07  2.127e-07   3.241 0.001190 ** 
## V4.10th                        -1.169e+00  1.707e-01  -6.848 7.49e-12 ***
## V4.11th                        -1.106e+00  1.708e-01  -6.476 9.41e-11 ***
## V4.12th                        -8.562e-01  3.188e-01  -2.686 0.007236 ** 
## V4.1st.4th                     -1.741e+00  4.843e-01  -3.595 0.000325 ***
## V4.5th.6th                     -1.482e+00  3.069e-01  -4.828 1.38e-06 ***
## V4.7th.8th                     -1.618e+00  1.957e-01  -8.268  < 2e-16 ***
## V4.9th                         -1.325e+00  2.285e-01  -5.797 6.76e-09 ***
## V4.Assoc.acdm                   1.387e-01  1.601e-01   0.866 0.386287    
## V4.Assoc.voc                    1.032e-01  1.585e-01   0.651 0.515109    
## V4.Bachelors                    5.937e-01  9.984e-02   5.947 2.74e-09 ***
## V4.Doctorate                    1.835e+00  1.768e-01  10.379  < 2e-16 ***
## V4.HS.grad                     -4.623e-01  9.987e-02  -4.629 3.67e-06 ***
## V4.Masters                      1.019e+00  1.205e-01   8.462  < 2e-16 ***
## V4.Preschool                   -3.217e+01  6.329e+04  -0.001 0.999594    
## V4.Prof.school                  1.589e+00  1.577e-01  10.082  < 2e-16 ***
## V4.Some.college                        NA         NA      NA       NA    
## V5                                     NA         NA      NA       NA    
## V6.Divorced                    -2.860e-01  1.855e-01  -1.542 0.122995    
## V6.Married.AF.spouse            2.264e+00  8.805e-01   2.571 0.010128 *  
## V6.Married.civ.spouse           1.810e+00  4.553e-01   3.976 7.01e-05 ***
## V6.Married.spouse.absent       -5.586e-01  3.806e-01  -1.468 0.142147    
## V6.Never.married               -8.128e-01  1.962e-01  -4.143 3.43e-05 ***
## V6.Separated                   -3.280e-01  2.735e-01  -1.199 0.230452    
## V6.Widowed                             NA         NA      NA       NA    
## V7..                                   NA         NA      NA       NA    
## V7.Adm.clerical                -2.085e-01  1.650e-01  -1.263 0.206432    
## V7.Armed.Forces                -1.677e-01  2.205e+00  -0.076 0.939391    
## V7.Craft.repair                 7.394e-02  1.413e-01   0.523 0.600816    
## V7.Exec.managerial              7.304e-01  1.424e-01   5.130 2.90e-07 ***
## V7.Farming.fishing             -1.036e+00  2.129e-01  -4.866 1.14e-06 ***
## V7.Handlers.cleaners           -8.105e-01  2.508e-01  -3.232 0.001231 ** 
## V7.Machine.op.inspct           -6.159e-01  1.917e-01  -3.213 0.001312 ** 
## V7.Other.service               -1.049e+00  2.076e-01  -5.053 4.35e-07 ***
## V7.Priv.house.serv             -2.332e+01  2.443e+04  -0.001 0.999238    
## V7.Prof.specialty               3.653e-01  1.491e-01   2.450 0.014300 *  
## V7.Protective.serv              1.828e-01  2.277e-01   0.803 0.422010    
## V7.Sales                        1.044e-01  1.498e-01   0.697 0.485734    
## V7.Tech.support                 5.809e-01  2.033e-01   2.857 0.004278 ** 
## V7.Transport.moving                    NA         NA      NA       NA    
## V8.Husband                     -1.270e+00  1.565e-01  -8.118 4.75e-16 ***
## V8.Not.in.family               -6.745e-01  4.429e-01  -1.523 0.127787    
## V8.Other.relative              -1.488e+00  4.241e-01  -3.509 0.000450 ***
## V8.Own.child                   -1.820e+00  4.592e-01  -3.964 7.36e-05 ***
## V8.Unmarried                   -6.297e-01  4.557e-01  -1.382 0.166968    
## V8.Wife                                NA         NA      NA       NA    
## V9.Amer.Indian.Eskimo          -1.743e-01  3.185e-01  -0.547 0.584288    
## V9.Asian.Pac.Islander           1.859e-01  2.667e-01   0.697 0.485786    
## V9.Black                       -9.233e-02  1.164e-01  -0.793 0.427751    
## V9.Other                        4.619e-01  3.857e-01   1.198 0.231052    
## V9.White                               NA         NA      NA       NA    
## V10.Female                     -8.920e-01  1.133e-01  -7.872 3.49e-15 ***
## V10.Male                               NA         NA      NA       NA    
## V11                             3.005e-04  1.528e-05  19.668  < 2e-16 ***
## V12                             6.511e-04  5.583e-05  11.662  < 2e-16 ***
## V13                             3.122e-02  2.322e-03  13.445  < 2e-16 ***
## V14..                          -1.280e+00  1.212e+00  -1.056 0.290857    
## V14.Cambodia                    3.755e-01  1.680e+00   0.223 0.823174    
## V14.Canada                     -9.345e-01  1.246e+00  -0.750 0.453395    
## V14.China                      -2.114e+00  1.306e+00  -1.619 0.105527    
## V14.Columbia                   -2.620e+00  1.497e+00  -1.749 0.080212 .  
## V14.Cuba                       -1.492e+00  1.276e+00  -1.169 0.242393    
## V14.Dominican.Republic         -2.395e+01  4.126e+04  -0.001 0.999537    
## V14.Ecuador                    -2.571e+00  1.971e+00  -1.305 0.191940    
## V14.El.Salvador                -1.926e+00  1.451e+00  -1.327 0.184408    
## V14.England                    -1.037e+00  1.265e+00  -0.819 0.412585    
## V14.France                     -9.263e-01  1.358e+00  -0.682 0.495292    
## V14.Germany                    -6.693e-01  1.267e+00  -0.528 0.597199    
## V14.Greece                     -1.402e+00  1.463e+00  -0.958 0.338079    
## V14.Guatemala                  -1.357e+00  1.556e+00  -0.872 0.383148    
## V14.Haiti                      -2.065e+00  1.898e+00  -1.088 0.276500    
## V14.Holand.Netherlands         -2.328e+01  3.075e+05   0.000 0.999940    
## V14.Honduras                   -1.547e+00  3.232e+00  -0.479 0.632153    
## V14.Hong                       -6.725e-01  1.451e+00  -0.463 0.643089    
## V14.Hungary                    -5.827e-01  1.639e+00  -0.355 0.722223    
## V14.India                      -1.689e+00  1.283e+00  -1.317 0.187963    
## V14.Iran                       -1.879e+00  1.447e+00  -1.298 0.194142    
## V14.Ireland                    -5.711e-01  1.894e+00  -0.302 0.762982    
## V14.Italy                       5.082e-01  1.255e+00   0.405 0.685609    
## V14.Jamaica                    -2.496e+00  1.621e+00  -1.540 0.123581    
## V14.Japan                      -1.754e-01  1.370e+00  -0.128 0.898101    
## V14.Laos                       -7.329e-01  1.570e+00  -0.467 0.640724    
## V14.Mexico                     -1.573e+00  1.219e+00  -1.291 0.196677    
## V14.Nicaragua                  -1.716e+00  1.449e+00  -1.184 0.236327    
## V14.Outlying.US.Guam.USVI.etc. -2.564e+01  1.160e+05   0.000 0.999824    
## V14.Peru                       -1.265e+00  1.641e+00  -0.771 0.440959    
## V14.Philippines                -1.387e+00  1.263e+00  -1.098 0.272027    
## V14.Poland                     -7.809e-01  1.367e+00  -0.571 0.567687    
## V14.Portugal                   -2.234e+00  1.721e+00  -1.298 0.194220    
## V14.Puerto.Rico                -4.165e-01  1.305e+00  -0.319 0.749604    
## V14.Scotland                    2.571e-01  1.775e+00   0.145 0.884829    
## V14.South                      -2.715e+00  1.388e+00  -1.956 0.050460 .  
## V14.Taiwan                     -5.682e-01  1.398e+00  -0.406 0.684404    
## V14.Thailand                   -1.812e+00  1.906e+00  -0.951 0.341798    
## V14.Trinadad.Tobago            -1.127e+00  1.723e+00  -0.654 0.512878    
## V14.United.States              -9.105e-01  1.193e+00  -0.763 0.445470    
## V14.Vietnam                    -1.315e+00  1.463e+00  -0.899 0.368657    
## V14.Yugoslavia                         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 15320.8  on 13386  degrees of freedom
## Residual deviance:  8281.5  on 13287  degrees of freedom
## AIC: 8481.5
## 
## Number of Fisher Scoring iterations: 25
# Predict outcome using Adult_TDA_KDE_5.50.5_n1_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n1_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n1_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n1_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6969  1004
##      >50K     447  1348
##                                           
##                Accuracy : 0.8515          
##                  95% CI : (0.8442, 0.8585)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.558           
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9397          
##             Specificity : 0.5731          
##          Pos Pred Value : 0.8741          
##          Neg Pred Value : 0.7510          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7135          
##    Detection Prevalence : 0.8162          
##       Balanced Accuracy : 0.7564          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n1_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6969  1004
##      >50K     447  1348
##                                           
##                Accuracy : 0.8515          
##                  95% CI : (0.8442, 0.8585)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.558           
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9397          
##             Specificity : 0.5731          
##          Pos Pred Value : 0.8741          
##          Neg Pred Value : 0.7510          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7135          
##    Detection Prevalence : 0.8162          
##       Balanced Accuracy : 0.7564          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n1_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.514537e-01   5.579693e-01   8.442451e-01   8.584521e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  4.129564e-112   2.967542e-48
ad_tda_kde_5.50.5_n1_lr_cf0_ov_acc<-ad_tda_kde_5.50.5_n1_lr_cf0$overall[1]
ad_tda_kde_5.50.5_n1_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9397249            0.5731293            0.8740750 
##       Neg Pred Value            Precision               Recall 
##            0.7509749            0.8740750            0.9397249 
##                   F1           Prevalence       Detection Rate 
##            0.9057119            0.7592138            0.7134521 
## Detection Prevalence    Balanced Accuracy 
##            0.8162367            0.7564271
ad_tda_kde_5.50.5_n1_lr_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n1_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_lr_n1_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.50.5_n1_lr_fit_re)
diff_tda_kde_5.50.5_lr_n1_3_fold
##        Accuracy
## 1 -0.0085132539
## 2  0.0003325428
## 3 -0.0179916328
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_lr.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n1_3_fold
## $probLeft
## [1] 0.25
## 
## $probRope
## [1] 0.75
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_lr.n1_3_fold_odds.left<-bst_tda_kde_5.50.5_lr.n1_3_fold$probLeft/bst_tda_kde_5.50.5_lr.n1_3_fold$probRight
bst_tda_kde_5.50.5_lr.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_lr.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n1_3_fold
## $winLeft
## [1] 0.1609
## 
## $winRope
## [1] 0.8391
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_lr.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n1_3_fold
## $left
## [1] 0.4269546
## 
## $rope
## [1] 0.5270443
## 
## $right
## [1] 0.04600108
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_lr_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr_n1_3_fold))
#bf_tda_kde_5.50.5_lr.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_lr_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_lr_n1_3_fold)
## t = -1.6489, df = 2, p-value = 0.2409
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.03148852  0.01404029
## sample estimates:
##    mean of x 
## -0.008724115
### Test set diff
diff_tda_kde_5.50.5_lr.n1_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n1_lr_cf0_ov_acc)
diff_tda_kde_5.50.5_lr.n1_test
##      Accuracy 
## -0.0004095004
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_lr.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr.n1_test),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_lr.n1_test_odds.left<-bst_tda_kde_5.50.5_lr.n1_test$probLeft/bst_tda_kde_5.50.5_lr.n1_test$probRight
bst_tda_kde_5.50.5_lr.n1_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_lr.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr.n1_test),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_lr.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_lr.n1_test)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr.n1_test)) #bf_tda_pca_5.50.5_lr.n1_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_lr.n1_test))


##With TDA KDE filter 5 intervals, 50% overlap, 5 bins 
##Node2

Adult_TDA_KDE_5.50.5_n2_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_kde_adult_5.50.5.n2.vec, 
                 family = 'binomial',
                method = 'glm', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.50.5_n2_LrFit0
## Generalized Linear Model 
## 
## 12638 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8426, 8425, 8425 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8417463  0.5941028
Adult_TDA_KDE_5.50.5_n2_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8311966 0.5697380    Fold1
## 2 0.8530738 0.6180946    Fold2
## 3 0.8409684 0.5944759    Fold3
ad_tda_kde_5.50.5_n2_lr_fit_re<-Adult_TDA_KDE_5.50.5_n2_LrFit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n2_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (15 not defined because of singularities)
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     2.377e+12  5.353e+12   0.444 0.656994    
## V1                              4.019e-02  2.998e-03  13.407  < 2e-16 ***
## V2..                           -2.377e+12  5.353e+12  -0.444 0.656994    
## V2.Federal.gov                 -2.377e+12  5.353e+12  -0.444 0.656994    
## V2.Local.gov                   -2.377e+12  5.353e+12  -0.444 0.656994    
## V2.Never.worked                -2.377e+12  5.353e+12  -0.444 0.656994    
## V2.Private                     -2.377e+12  5.353e+12  -0.444 0.656994    
## V2.Self.emp.inc                -2.377e+12  5.353e+12  -0.444 0.656994    
## V2.Self.emp.not.inc            -2.377e+12  5.353e+12  -0.444 0.656994    
## V2.State.gov                   -2.377e+12  5.353e+12  -0.444 0.656994    
## V2.Without.pay                 -2.377e+12  5.353e+12  -0.444 0.656994    
## V3                              1.103e-06  3.002e-07   3.673 0.000240 ***
## V4.10th                        -1.293e+00  6.071e-01  -2.129 0.033222 *  
## V4.11th                        -7.382e-01  2.070e-01  -3.566 0.000363 ***
## V4.12th                        -6.992e-01  3.145e-01  -2.223 0.026183 *  
## V4.1st.4th                             NA         NA      NA       NA    
## V4.5th.6th                             NA         NA      NA       NA    
## V4.7th.8th                             NA         NA      NA       NA    
## V4.9th                                 NA         NA      NA       NA    
## V4.Assoc.acdm                   2.497e-01  1.403e-01   1.779 0.075165 .  
## V4.Assoc.voc                    1.348e-01  1.431e-01   0.942 0.346329    
## V4.Bachelors                    9.199e-01  9.162e-02  10.040  < 2e-16 ***
## V4.Doctorate                           NA         NA      NA       NA    
## V4.HS.grad                     -4.506e-01  8.894e-02  -5.067 4.05e-07 ***
## V4.Masters                      1.352e+00  1.141e-01  11.857  < 2e-16 ***
## V4.Preschool                           NA         NA      NA       NA    
## V4.Prof.school                  2.484e+00  2.158e-01  11.509  < 2e-16 ***
## V4.Some.college                        NA         NA      NA       NA    
## V5                                     NA         NA      NA       NA    
## V6.Divorced                    -4.044e-02  2.329e-01  -0.174 0.862155    
## V6.Married.AF.spouse            3.066e+00  9.421e-01   3.254 0.001137 ** 
## V6.Married.civ.spouse           2.088e+00  5.310e-01   3.932 8.43e-05 ***
## V6.Married.spouse.absent        2.967e-02  3.779e-01   0.079 0.937413    
## V6.Never.married               -5.803e-01  2.445e-01  -2.374 0.017602 *  
## V6.Separated                   -2.802e-01  3.178e-01  -0.882 0.377909    
## V6.Widowed                             NA         NA      NA       NA    
## V7..                                   NA         NA      NA       NA    
## V7.Adm.clerical                 2.207e-02  1.627e-01   0.136 0.892080    
## V7.Armed.Forces                -2.415e+01  3.607e+05   0.000 0.999947    
## V7.Craft.repair                 1.250e-01  1.443e-01   0.867 0.386187    
## V7.Exec.managerial              8.952e-01  1.436e-01   6.236 4.49e-10 ***
## V7.Farming.fishing             -6.954e-01  2.187e-01  -3.180 0.001473 ** 
## V7.Handlers.cleaners           -4.394e-01  2.579e-01  -1.704 0.088433 .  
## V7.Machine.op.inspct           -4.405e-01  1.941e-01  -2.270 0.023203 *  
## V7.Other.service               -1.026e+00  2.122e-01  -4.836 1.32e-06 ***
## V7.Priv.house.serv             -3.433e+00  2.526e+00  -1.359 0.174160    
## V7.Prof.specialty               5.321e-01  1.509e-01   3.527 0.000420 ***
## V7.Protective.serv              7.123e-01  2.190e-01   3.253 0.001142 ** 
## V7.Sales                        2.879e-01  1.495e-01   1.926 0.054125 .  
## V7.Tech.support                 5.911e-01  1.928e-01   3.066 0.002173 ** 
## V7.Transport.moving                    NA         NA      NA       NA    
## V8.Husband                     -1.440e+00  1.490e-01  -9.669  < 2e-16 ***
## V8.Not.in.family               -8.639e-01  4.964e-01  -1.740 0.081799 .  
## V8.Other.relative              -1.702e+00  4.234e-01  -4.021 5.81e-05 ***
## V8.Own.child                   -1.897e+00  5.021e-01  -3.777 0.000159 ***
## V8.Unmarried                   -9.347e-01  5.071e-01  -1.843 0.065260 .  
## V8.Wife                                NA         NA      NA       NA    
## V9.Amer.Indian.Eskimo          -5.421e-03  3.121e-01  -0.017 0.986139    
## V9.Asian.Pac.Islander          -1.005e-01  2.368e-01  -0.425 0.671122    
## V9.Black                        2.333e-03  1.215e-01   0.019 0.984688    
## V9.Other                        1.334e-01  4.064e-01   0.328 0.742657    
## V9.White                               NA         NA      NA       NA    
## V10.Female                     -8.916e-01  1.120e-01  -7.960 1.72e-15 ***
## V10.Male                               NA         NA      NA       NA    
## V11                             3.295e-04  1.625e-05  20.279  < 2e-16 ***
## V12                             7.852e-04  6.169e-05  12.727  < 2e-16 ***
## V13                             2.469e-02  2.664e-03   9.267  < 2e-16 ***
## V14..                          -4.861e-01  9.129e-01  -0.532 0.594403    
## V14.Cambodia                    1.803e+00  1.339e+00   1.346 0.178193    
## V14.Canada                     -5.672e-02  9.926e-01  -0.057 0.954429    
## V14.China                      -6.112e-01  1.074e+00  -0.569 0.569260    
## V14.Columbia                   -2.500e+01  6.303e+04   0.000 0.999684    
## V14.Cuba                        4.934e-02  1.016e+00   0.049 0.961261    
## V14.Dominican.Republic         -2.431e+01  6.589e+04   0.000 0.999706    
## V14.Ecuador                    -1.085e+00  1.642e+00  -0.661 0.508921    
## V14.El.Salvador                -2.494e-01  1.173e+00  -0.213 0.831555    
## V14.England                     5.221e-01  9.807e-01   0.532 0.594445    
## V14.France                      6.892e-01  1.203e+00   0.573 0.566757    
## V14.Germany                     7.369e-01  9.650e-01   0.764 0.445097    
## V14.Greece                     -1.341e+00  1.203e+00  -1.115 0.264954    
## V14.Guatemala                  -7.413e-01  2.216e+00  -0.334 0.738034    
## V14.Haiti                      -5.211e-01  1.222e+00  -0.427 0.669732    
## V14.Holand.Netherlands         -2.230e+01  3.364e+05   0.000 0.999947    
## V14.Honduras                   -2.309e+01  1.684e+05   0.000 0.999891    
## V14.Hong                        1.743e+00  1.363e+00   1.278 0.201085    
## V14.Hungary                     9.291e-01  1.599e+00   0.581 0.561302    
## V14.India                      -3.241e-01  9.738e-01  -0.333 0.739287    
## V14.Iran                       -9.642e-02  1.249e+00  -0.077 0.938485    
## V14.Ireland                    -2.406e+01  1.255e+05   0.000 0.999847    
## V14.Italy                       9.493e-01  1.030e+00   0.921 0.356945    
## V14.Jamaica                    -1.946e+00  1.368e+00  -1.423 0.154862    
## V14.Japan                       4.816e-01  1.096e+00   0.440 0.660258    
## V14.Laos                       -2.472e+01  1.183e+05   0.000 0.999833    
## V14.Mexico                     -6.107e-01  9.836e-01  -0.621 0.534691    
## V14.Nicaragua                  -2.426e+01  8.981e+04   0.000 0.999785    
## V14.Outlying.US.Guam.USVI.etc. -2.428e+01  1.889e+05   0.000 0.999897    
## V14.Peru                       -3.972e-01  1.436e+00  -0.277 0.782121    
## V14.Philippines                 6.763e-01  9.607e-01   0.704 0.481468    
## V14.Poland                      3.535e-01  1.087e+00   0.325 0.745094    
## V14.Portugal                   -2.412e+01  1.266e+05   0.000 0.999848    
## V14.Puerto.Rico                -3.442e-01  1.140e+00  -0.302 0.762585    
## V14.Scotland                    8.816e-03  1.404e+00   0.006 0.994991    
## V14.South                      -1.041e+00  1.069e+00  -0.974 0.330195    
## V14.Taiwan                      1.561e-01  1.085e+00   0.144 0.885539    
## V14.Thailand                   -8.911e-01  1.498e+00  -0.595 0.551914    
## V14.Trinadad.Tobago            -3.699e-01  1.763e+00  -0.210 0.833792    
## V14.United.States               1.391e-01  8.881e-01   0.157 0.875548    
## V14.Vietnam                    -1.970e+00  1.455e+00  -1.354 0.175700    
## V14.Yugoslavia                         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 14967.3  on 12637  degrees of freedom
## Residual deviance:  8297.9  on 12544  degrees of freedom
## AIC: 8485.9
## 
## Number of Fisher Scoring iterations: 25
# Predict outcome using Adult_TDA_KDE_5.50.5_n2_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n2_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n2_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n2_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6903   985
##      >50K     513  1367
##                                           
##                Accuracy : 0.8466          
##                  95% CI : (0.8393, 0.8537)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5497          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9308          
##             Specificity : 0.5812          
##          Pos Pred Value : 0.8751          
##          Neg Pred Value : 0.7271          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7067          
##    Detection Prevalence : 0.8075          
##       Balanced Accuracy : 0.7560          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n2_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6903   985
##      >50K     513  1367
##                                           
##                Accuracy : 0.8466          
##                  95% CI : (0.8393, 0.8537)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5497          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.9308          
##             Specificity : 0.5812          
##          Pos Pred Value : 0.8751          
##          Neg Pred Value : 0.7271          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7067          
##    Detection Prevalence : 0.8075          
##       Balanced Accuracy : 0.7560          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n2_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.466421e-01   5.496963e-01   8.393419e-01   8.537349e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##  2.790746e-100   4.530430e-34
ad_tda_kde_5.50.5_n2_lr_cf0_ov_acc<-ad_tda_kde_5.50.5_n2_lr_cf0$overall[1]
ad_tda_kde_5.50.5_n2_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9308252            0.5812075            0.8751268 
##       Neg Pred Value            Precision               Recall 
##            0.7271277            0.8751268            0.9308252 
##                   F1           Prevalence       Detection Rate 
##            0.9021171            0.7592138            0.7066953 
## Detection Prevalence    Balanced Accuracy 
##            0.8075348            0.7560164
ad_tda_kde_5.50.5_n2_lr_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n2_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_lr_n2_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.50.5_n2_lr_fit_re)
diff_tda_kde_5.50.5_lr_n2_3_fold
##      Accuracy
## 1 0.019290389
## 2 0.003185230
## 3 0.003464314
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_lr.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.75
## 
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_lr.n2_3_fold_odds.left<-bst_tda_kde_5.50.5_lr.n2_3_fold$probLeft/bst_tda_kde_5.50.5_lr.n2_3_fold$probRight
bst_tda_kde_5.50.5_lr.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_lr.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.6732333
## 
## $winRight
## [1] 0.3267667
# Bayesian Correlated Test

bct_tda_kde_5.50.5_lr.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n2_3_fold
## $left
## [1] 0.04681356
## 
## $rope
## [1] 0.5301139
## 
## $right
## [1] 0.4230726
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_lr_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr_n2_3_fold))
#bf_tda_kde_5.50.5_lr.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_lr_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_lr_n2_3_fold)
## t = 1.6246, df = 2, p-value = 0.2458
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.01425415  0.03154744
## sample estimates:
##   mean of x 
## 0.008646645
### Test set diff
diff_tda_kde_5.50.5_lr.n2_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n2_lr_cf0_ov_acc)
diff_tda_kde_5.50.5_lr.n2_test
##    Accuracy 
## 0.004402129
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_lr.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr.n2_test),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 1
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_lr.n2_test_odds.left<-bst_tda_kde_5.50.5_lr.n2_test$probLeft/bst_tda_kde_5.50.5_lr.n2_test$probRight
bst_tda_kde_5.50.5_lr.n2_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_lr.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr.n2_test),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_lr.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_lr.n2_test)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr.n2_test)) #bf_tda_pca_5.50.5_lr.n2_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_lr.n2_test))

##Node3

Adult_TDA_KDE_5.50.5_n3_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_kde_adult_5.50.5.n3.vec, 
                 family = 'binomial',
                method = 'glm', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.50.5_n3_LrFit0
## Generalized Linear Model 
## 
## 11634 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 7756, 7756, 7756 
## Resampling results:
## 
##   Accuracy   Kappa   
##   0.8318721  0.565766
Adult_TDA_KDE_5.50.5_n3_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8388345 0.5812742    Fold1
## 2 0.8274884 0.5520649    Fold2
## 3 0.8292935 0.5639590    Fold3
ad_tda_kde_5.50.5_n3_lr_fit_re<-Adult_TDA_KDE_5.50.5_n2_LrFit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n3_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (18 not defined because of singularities)
##                                  Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)                    -1.210e+13  1.132e+13   -1.068 0.285486    
## V1                              5.716e-02  3.707e-03   15.419  < 2e-16 ***
## V2..                            1.210e+13  1.132e+13    1.068 0.285486    
## V2.Federal.gov                  1.210e+13  1.132e+13    1.068 0.285486    
## V2.Local.gov                    1.210e+13  1.132e+13    1.068 0.285486    
## V2.Never.worked                -4.492e+15  1.132e+13 -396.606  < 2e-16 ***
## V2.Private                      1.210e+13  1.132e+13    1.068 0.285486    
## V2.Self.emp.inc                 1.210e+13  1.132e+13    1.068 0.285486    
## V2.Self.emp.not.inc             1.210e+13  1.132e+13    1.068 0.285486    
## V2.State.gov                    1.210e+13  1.132e+13    1.068 0.285486    
## V2.Without.pay                  1.210e+13  1.132e+13    1.068 0.285486    
## V3                              1.058e-06  4.265e-07    2.480 0.013122 *  
## V4.10th                                NA         NA       NA       NA    
## V4.11th                        -2.141e+01  1.391e+05    0.000 0.999877    
## V4.12th                        -3.019e-01  3.262e-01   -0.926 0.354645    
## V4.1st.4th                             NA         NA       NA       NA    
## V4.5th.6th                             NA         NA       NA       NA    
## V4.7th.8th                             NA         NA       NA       NA    
## V4.9th                                 NA         NA       NA       NA    
## V4.Assoc.acdm                   4.043e-01  1.394e-01    2.901 0.003723 ** 
## V4.Assoc.voc                    3.090e-01  1.336e-01    2.313 0.020710 *  
## V4.Bachelors                    1.133e+00  9.265e-02   12.228  < 2e-16 ***
## V4.Doctorate                           NA         NA       NA       NA    
## V4.HS.grad                     -2.567e-01  8.674e-02   -2.959 0.003087 ** 
## V4.Masters                      1.542e+00  1.396e-01   11.048  < 2e-16 ***
## V4.Preschool                           NA         NA       NA       NA    
## V4.Prof.school                         NA         NA       NA       NA    
## V4.Some.college                        NA         NA       NA       NA    
## V5                                     NA         NA       NA       NA    
## V6.Divorced                    -2.761e-02  3.077e-01   -0.090 0.928514    
## V6.Married.AF.spouse            3.429e+00  1.082e+00    3.170 0.001525 ** 
## V6.Married.civ.spouse           2.257e+00  5.151e-01    4.382 1.18e-05 ***
## V6.Married.spouse.absent        1.001e-01  4.357e-01    0.230 0.818365    
## V6.Never.married               -3.595e-01  3.181e-01   -1.130 0.258384    
## V6.Separated                   -2.111e-01  3.891e-01   -0.543 0.587345    
## V6.Widowed                             NA         NA       NA       NA    
## V7..                                   NA         NA       NA       NA    
## V7.Adm.clerical                 2.406e-01  1.626e-01    1.480 0.139002    
## V7.Armed.Forces                -2.416e+01  1.729e+05    0.000 0.999888    
## V7.Craft.repair                 1.624e-01  1.439e-01    1.129 0.258979    
## V7.Exec.managerial              1.016e+00  1.458e-01    6.970 3.17e-12 ***
## V7.Farming.fishing             -8.835e-01  2.453e-01   -3.602 0.000316 ***
## V7.Handlers.cleaners           -4.339e-01  2.492e-01   -1.741 0.081600 .  
## V7.Machine.op.inspct           -5.824e-02  1.761e-01   -0.331 0.740869    
## V7.Other.service               -6.660e-01  2.097e-01   -3.176 0.001495 ** 
## V7.Priv.house.serv             -3.271e+00  2.440e+00   -1.341 0.180045    
## V7.Prof.specialty               6.993e-01  1.540e-01    4.541 5.60e-06 ***
## V7.Protective.serv              1.032e+00  2.144e-01    4.812 1.49e-06 ***
## V7.Sales                        6.145e-01  1.496e-01    4.106 4.02e-05 ***
## V7.Tech.support                 7.322e-01  1.889e-01    3.876 0.000106 ***
## V7.Transport.moving                    NA         NA       NA       NA    
## V8.Husband                     -1.456e+00  1.601e-01   -9.091  < 2e-16 ***
## V8.Not.in.family               -9.264e-01  4.390e-01   -2.110 0.034857 *  
## V8.Other.relative              -1.820e+00  3.930e-01   -4.629 3.67e-06 ***
## V8.Own.child                   -2.250e+00  4.251e-01   -5.293 1.20e-07 ***
## V8.Unmarried                   -1.128e+00  4.525e-01   -2.493 0.012654 *  
## V8.Wife                                NA         NA       NA       NA    
## V9.Amer.Indian.Eskimo          -4.874e-01  3.861e-01   -1.262 0.206910    
## V9.Asian.Pac.Islander           4.647e-02  2.241e-01    0.207 0.835706    
## V9.Black                       -1.506e-01  1.218e-01   -1.237 0.216218    
## V9.Other                       -6.531e-01  4.634e-01   -1.409 0.158715    
## V9.White                               NA         NA       NA       NA    
## V10.Female                     -8.174e-01  1.260e-01   -6.489 8.64e-11 ***
## V10.Male                               NA         NA       NA       NA    
## V11                             3.215e-04  1.673e-05   19.219  < 2e-16 ***
## V12                             6.916e-04  6.360e-05   10.875  < 2e-16 ***
## V13                             2.419e-02  2.892e-03    8.366  < 2e-16 ***
## V14..                          -4.034e-01  1.001e+00   -0.403 0.687010    
## V14.Cambodia                    1.621e+00  1.309e+00    1.239 0.215505    
## V14.Canada                      7.824e-01  1.077e+00    0.726 0.467623    
## V14.China                      -9.027e-01  1.176e+00   -0.768 0.442732    
## V14.Columbia                   -2.441e+01  5.758e+04    0.000 0.999662    
## V14.Cuba                        1.205e+00  1.123e+00    1.073 0.283390    
## V14.Dominican.Republic         -7.977e-01  1.486e+00   -0.537 0.591305    
## V14.Ecuador                    -1.104e+00  1.607e+00   -0.687 0.492121    
## V14.El.Salvador                 8.271e-02  1.280e+00    0.065 0.948471    
## V14.England                     4.644e-01  1.095e+00    0.424 0.671504    
## V14.France                      1.409e+00  1.308e+00    1.078 0.281142    
## V14.Germany                     4.124e-01  1.048e+00    0.394 0.693942    
## V14.Greece                     -1.840e+00  1.297e+00   -1.419 0.155846    
## V14.Guatemala                  -2.407e+01  9.554e+04    0.000 0.999799    
## V14.Haiti                      -6.195e-02  1.258e+00   -0.049 0.960732    
## V14.Holand.Netherlands                 NA         NA       NA       NA    
## V14.Honduras                   -2.330e+01  1.741e+05    0.000 0.999893    
## V14.Hong                       -2.201e+01  2.388e+05    0.000 0.999926    
## V14.Hungary                    -7.904e-01  1.531e+00   -0.516 0.605591    
## V14.India                      -4.675e-01  1.080e+00   -0.433 0.665103    
## V14.Iran                        1.600e-02  1.127e+00    0.014 0.988670    
## V14.Ireland                     1.424e+00  1.373e+00    1.037 0.299660    
## V14.Italy                      -6.461e-02  1.132e+00   -0.057 0.954474    
## V14.Jamaica                     1.388e-02  1.222e+00    0.011 0.990940    
## V14.Japan                       4.253e-01  1.167e+00    0.364 0.715511    
## V14.Laos                       -2.419e+01  1.483e+05    0.000 0.999870    
## V14.Mexico                     -7.942e-01  1.153e+00   -0.689 0.490896    
## V14.Nicaragua                  -2.163e+01  1.614e+05    0.000 0.999893    
## V14.Outlying.US.Guam.USVI.etc. -2.304e+01  1.849e+05    0.000 0.999901    
## V14.Peru                       -1.213e+00  1.633e+00   -0.743 0.457765    
## V14.Philippines                 1.096e+00  1.052e+00    1.042 0.297548    
## V14.Poland                     -1.086e-01  1.139e+00   -0.095 0.923996    
## V14.Portugal                    5.104e-01  1.356e+00    0.376 0.706689    
## V14.Puerto.Rico                -1.235e+00  1.178e+00   -1.048 0.294454    
## V14.Scotland                   -5.170e-01  1.647e+00   -0.314 0.753623    
## V14.South                      -1.024e+00  1.172e+00   -0.873 0.382456    
## V14.Taiwan                     -4.563e-01  1.221e+00   -0.374 0.708564    
## V14.Thailand                   -8.318e-01  1.654e+00   -0.503 0.614977    
## V14.Trinadad.Tobago            -4.545e-01  1.754e+00   -0.259 0.795488    
## V14.United.States               1.502e-01  9.798e-01    0.153 0.878157    
## V14.Vietnam                    -2.492e+00  1.491e+00   -1.671 0.094637 .  
## V14.Yugoslavia                         NA         NA       NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 13708.0  on 11633  degrees of freedom
## Residual deviance:  7850.4  on 11543  degrees of freedom
## AIC: 8032.4
## 
## Number of Fisher Scoring iterations: 25
# Predict outcome using Adult_TDA_KDE_5.50.5_n3_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n3_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n3_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n3_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6768   915
##      >50K     648  1437
##                                           
##                Accuracy : 0.84            
##                  95% CI : (0.8326, 0.8472)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5447          
##                                           
##  Mcnemar's Test P-Value : 1.717e-11       
##                                           
##             Sensitivity : 0.9126          
##             Specificity : 0.6110          
##          Pos Pred Value : 0.8809          
##          Neg Pred Value : 0.6892          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6929          
##    Detection Prevalence : 0.7865          
##       Balanced Accuracy : 0.7618          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n3_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6768   915
##      >50K     648  1437
##                                           
##                Accuracy : 0.84            
##                  95% CI : (0.8326, 0.8472)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5447          
##                                           
##  Mcnemar's Test P-Value : 1.717e-11       
##                                           
##             Sensitivity : 0.9126          
##             Specificity : 0.6110          
##          Pos Pred Value : 0.8809          
##          Neg Pred Value : 0.6892          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6929          
##    Detection Prevalence : 0.7865          
##       Balanced Accuracy : 0.7618          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n3_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.399877e-01   5.447027e-01   8.325650e-01   8.472071e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   3.681244e-85   1.717152e-11
ad_tda_kde_5.50.5_n3_lr_cf0_ov_acc<-ad_tda_kde_5.50.5_n3_lr_cf0$overall[1]
ad_tda_kde_5.50.5_n3_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.9126214            0.6109694            0.8809059 
##       Neg Pred Value            Precision               Recall 
##            0.6892086            0.8809059            0.9126214 
##                   F1           Prevalence       Detection Rate 
##            0.8964832            0.7592138            0.6928747 
## Detection Prevalence    Balanced Accuracy 
##            0.7865479            0.7617954
ad_tda_kde_5.50.5_n3_lr_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n3_lr_cf0$byClass[5:7]


###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_lr_n3_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.50.5_n3_lr_fit_re)
diff_tda_kde_5.50.5_lr_n3_3_fold
##      Accuracy
## 1 0.019290389
## 2 0.003185230
## 3 0.003464314
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_lr.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n3_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.75
## 
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_lr.n3_3_fold_odds.left<-bst_tda_kde_5.50.5_lr.n3_3_fold$probLeft/bst_tda_kde_5.50.5_lr.n3_3_fold$probRight
bst_tda_kde_5.50.5_lr.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_lr.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n3_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.6711
## 
## $winRight
## [1] 0.3289
# Bayesian Correlated Test

bct_tda_kde_5.50.5_lr.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n3_3_fold
## $left
## [1] 0.04681356
## 
## $rope
## [1] 0.5301139
## 
## $right
## [1] 0.4230726
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_lr_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr_n3_3_fold))
#bf_tda_kde_5.50.5_lr.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_lr_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_lr_n3_3_fold)
## t = 1.6246, df = 2, p-value = 0.2458
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.01425415  0.03154744
## sample estimates:
##   mean of x 
## 0.008646645
### Test set diff
diff_tda_kde_5.50.5_lr.n3_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n3_lr_cf0_ov_acc)
diff_tda_kde_5.50.5_lr.n3_test
##   Accuracy 
## 0.01105651
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_lr.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr.n3_test),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_lr.n3_test_odds.left<-bst_tda_kde_5.50.5_lr.n3_test$probLeft/bst_tda_kde_5.50.5_lr.n3_test$probRight
bst_tda_kde_5.50.5_lr.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_lr.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr.n2_test),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 1
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_lr.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_lr.n3_test)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr.n3_test)) #bf_tda_pca_5.50.5_lr.n3_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_lr.n3_test))

##Node4

Adult_TDA_KDE_5.50.5_n4_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_kde_adult_5.50.5.n4.vec, 
                 family = 'binomial',
                method = 'glm', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.50.5_n4_LrFit0
## Generalized Linear Model 
## 
## 10038 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 6691, 6693, 6692 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.5831072  0.1337313
Adult_TDA_KDE_5.50.5_n4_LrFit0$resample
##    Accuracy       Kappa Resample
## 1 0.6295190 -0.10484924    Fold1
## 2 0.8597907  0.54704909    Fold2
## 3 0.2600120 -0.04100596    Fold3
ad_tda_kde_5.50.5_n4_lr_fit_re<-Adult_TDA_KDE_5.50.5_n4_LrFit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n4_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (19 not defined because of singularities)
##                                  Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)                    -1.260e+12  1.517e+13   -0.083 0.933819    
## V1                              7.282e-02  4.998e-03   14.569  < 2e-16 ***
## V2..                            7.797e+12  2.097e+13    0.372 0.710086    
## V2.Federal.gov                  7.797e+12  2.096e+13    0.372 0.709945    
## V2.Local.gov                    7.797e+12  2.098e+13    0.372 0.710183    
## V2.Never.worked                -4.496e+15  2.096e+13 -214.464  < 2e-16 ***
## V2.Private                      7.797e+12  2.097e+13    0.372 0.710089    
## V2.Self.emp.inc                 7.797e+12  2.095e+13    0.372 0.709801    
## V2.Self.emp.not.inc             7.797e+12  2.096e+13    0.372 0.709943    
## V2.State.gov                    7.797e+12  2.096e+13    0.372 0.709964    
## V2.Without.pay                 -4.496e+15  2.099e+13 -214.207  < 2e-16 ***
## V3                              1.580e-06  6.297e-07    2.509 0.012114 *  
## V4.10th                                NA         NA       NA       NA    
## V4.11th                                NA         NA       NA       NA    
## V4.12th                        -2.427e-01  4.229e-01   -0.574 0.566027    
## V4.1st.4th                             NA         NA       NA       NA    
## V4.5th.6th                             NA         NA       NA       NA    
## V4.7th.8th                             NA         NA       NA       NA    
## V4.9th                                 NA         NA       NA       NA    
## V4.Assoc.acdm                   4.819e-01  1.934e-01    2.491 0.012730 *  
## V4.Assoc.voc                    4.399e-01  1.318e-01    3.337 0.000846 ***
## V4.Bachelors                    1.112e+00  1.107e-01   10.048  < 2e-16 ***
## V4.Doctorate                           NA         NA       NA       NA    
## V4.HS.grad                     -2.108e-01  8.542e-02   -2.468 0.013604 *  
## V4.Masters                             NA         NA       NA       NA    
## V4.Preschool                           NA         NA       NA       NA    
## V4.Prof.school                         NA         NA       NA       NA    
## V4.Some.college                        NA         NA       NA       NA    
## V5                                     NA         NA       NA       NA    
## V6.Divorced                    -3.636e-01  5.024e-01   -0.724 0.469302    
## V6.Married.AF.spouse            2.500e+00  1.009e+00    2.478 0.013212 *  
## V6.Married.civ.spouse           1.809e+00  6.901e-01    2.621 0.008767 ** 
## V6.Married.spouse.absent       -1.916e-02  6.769e-01   -0.028 0.977419    
## V6.Never.married               -4.782e-01  5.110e-01   -0.936 0.349334    
## V6.Separated                   -6.317e-01  5.843e-01   -1.081 0.279634    
## V6.Widowed                             NA         NA       NA       NA    
## V7..                                   NA         NA       NA       NA    
## V7.Adm.clerical                 3.182e-01  1.760e-01    1.808 0.070658 .  
## V7.Armed.Forces                -1.233e+01  4.244e+02   -0.029 0.976824    
## V7.Craft.repair                 2.127e-01  1.511e-01    1.408 0.159250    
## V7.Exec.managerial              9.917e-01  1.593e-01    6.226 4.80e-10 ***
## V7.Farming.fishing             -8.439e-01  2.869e-01   -2.942 0.003263 ** 
## V7.Handlers.cleaners           -5.814e-01  2.561e-01   -2.270 0.023184 *  
## V7.Machine.op.inspct           -7.993e-02  1.823e-01   -0.439 0.660993    
## V7.Other.service               -3.570e-01  2.137e-01   -1.670 0.094864 .  
## V7.Priv.house.serv             -1.786e+01  3.461e+03   -0.005 0.995883    
## V7.Prof.specialty               8.173e-01  1.771e-01    4.614 3.96e-06 ***
## V7.Protective.serv              9.216e-01  2.343e-01    3.933 8.39e-05 ***
## V7.Sales                        6.051e-01  1.617e-01    3.743 0.000182 ***
## V7.Tech.support                 8.322e-01  2.095e-01    3.972 7.13e-05 ***
## V7.Transport.moving                    NA         NA       NA       NA    
## V8.Husband                     -1.545e+00  2.175e-01   -7.105 1.20e-12 ***
## V8.Not.in.family               -1.300e+00  5.183e-01   -2.508 0.012158 *  
## V8.Other.relative              -2.250e+00  5.009e-01   -4.492 7.06e-06 ***
## V8.Own.child                   -2.508e+00  4.923e-01   -5.093 3.52e-07 ***
## V8.Unmarried                   -1.627e+00  5.374e-01   -3.027 0.002473 ** 
## V8.Wife                                NA         NA       NA       NA    
## V9.Amer.Indian.Eskimo          -1.181e+00  5.091e-01   -2.319 0.020394 *  
## V9.Asian.Pac.Islander           4.669e-01  2.799e-01    1.668 0.095275 .  
## V9.Black                       -3.876e-01  1.459e-01   -2.657 0.007890 ** 
## V9.Other                       -1.463e+00  6.704e-01   -2.183 0.029036 *  
## V9.White                               NA         NA       NA       NA    
## V10.Female                     -8.349e-01  1.911e-01   -4.370 1.24e-05 ***
## V10.Male                               NA         NA       NA       NA    
## V11                             3.266e-04  2.052e-05   15.916  < 2e-16 ***
## V12                             5.521e-04  7.073e-05    7.805 5.93e-15 ***
## V13                             3.011e-02  3.387e-03    8.890  < 2e-16 ***
## V14..                          -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Cambodia                   -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Canada                     -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.China                      -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Columbia                   -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Cuba                       -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Dominican.Republic         -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Ecuador                    -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.El.Salvador                -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.England                    -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.France                     -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Germany                    -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Greece                     -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Guatemala                  -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Haiti                      -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Holand.Netherlands                 NA         NA       NA       NA    
## V14.Honduras                   -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Hong                       -4.510e+15  2.170e+13 -207.852  < 2e-16 ***
## V14.Hungary                    -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.India                      -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Iran                       -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Ireland                    -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Italy                      -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Jamaica                    -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Japan                      -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Laos                       -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Mexico                     -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Nicaragua                  -4.510e+15  2.170e+13 -207.852  < 2e-16 ***
## V14.Outlying.US.Guam.USVI.etc. -4.510e+15  2.170e+13 -207.852  < 2e-16 ***
## V14.Peru                       -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Philippines                -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Poland                     -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Portugal                   -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Puerto.Rico                -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Scotland                   -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.South                      -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Taiwan                     -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Thailand                   -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Trinadad.Tobago            -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.United.States              -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Vietnam                    -6.537e+12  2.170e+13   -0.301 0.763212    
## V14.Yugoslavia                 -6.537e+12  2.170e+13   -0.301 0.763212    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 10294.3  on 10037  degrees of freedom
## Residual deviance:  5791.2  on  9948  degrees of freedom
## AIC: 5971.2
## 
## Number of Fisher Scoring iterations: 25
# Predict outcome using Adult_TDA_KDE_5.50.5_n4_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n4_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n4_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n4_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6608   838
##      >50K     808  1514
##                                           
##                Accuracy : 0.8315          
##                  95% CI : (0.8239, 0.8389)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.5371          
##                                           
##  Mcnemar's Test P-Value : 0.4747          
##                                           
##             Sensitivity : 0.8910          
##             Specificity : 0.6437          
##          Pos Pred Value : 0.8875          
##          Neg Pred Value : 0.6520          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6765          
##    Detection Prevalence : 0.7623          
##       Balanced Accuracy : 0.7674          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n4_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6608   838
##      >50K     808  1514
##                                           
##                Accuracy : 0.8315          
##                  95% CI : (0.8239, 0.8389)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.5371          
##                                           
##  Mcnemar's Test P-Value : 0.4747          
##                                           
##             Sensitivity : 0.8910          
##             Specificity : 0.6437          
##          Pos Pred Value : 0.8875          
##          Neg Pred Value : 0.6520          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6765          
##    Detection Prevalence : 0.7623          
##       Balanced Accuracy : 0.7674          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n4_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.314906e-01   5.370929e-01   8.239177e-01   8.388652e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   7.115193e-68   4.747341e-01
ad_tda_kde_5.50.5_n4_lr_cf0_ov_acc<-ad_tda_kde_5.50.5_n4_lr_cf0$overall[1]
ad_tda_kde_5.50.5_n4_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.8910464            0.6437075            0.8874564 
##       Neg Pred Value            Precision               Recall 
##            0.6520241            0.8874564            0.8910464 
##                   F1           Prevalence       Detection Rate 
##            0.8892477            0.7592138            0.6764947 
## Detection Prevalence    Balanced Accuracy 
##            0.7622850            0.7673769
ad_tda_kde_5.50.5_n4_lr_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n4_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_lr_n4_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.50.5_n4_lr_fit_re)
diff_tda_kde_5.50.5_lr_n4_3_fold
##       Accuracy
## 1  0.220967998
## 2 -0.003531683
## 3  0.584420791
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_lr.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n4_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_lr.n4_3_fold_odds.left<-bst_tda_kde_5.50.5_lr.n4_3_fold$probLeft/bst_tda_kde_5.50.5_lr.n4_3_fold$probRight
bst_tda_kde_5.50.5_lr.n4_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_lr.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n4_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1454333
## 
## $winRight
## [1] 0.8545667
# Bayesian Correlated Test

bct_tda_kde_5.50.5_lr.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n4_3_fold
## $left
## [1] 0.1480025
## 
## $rope
## [1] 0.01351726
## 
## $right
## [1] 0.8384802
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_lr_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr_n4_3_fold))
#bf_tda_kde_5.50.5_lr.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_lr_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_lr_n4_3_fold)
## t = 1.5603, df = 2, p-value = 0.2591
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.4697585  1.0043299
## sample estimates:
## mean of x 
## 0.2672857
### Test set diff
diff_tda_kde_5.50.5_lr.n4_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n4_lr_cf0_ov_acc)
diff_tda_kde_5.50.5_lr.n4_test
##   Accuracy 
## 0.01955364
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_lr.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr.n4_test),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_lr.n4_test_odds.left<-bst_tda_kde_5.50.5_lr.n4_test$probLeft/bst_tda_kde_5.50.5_lr.n4_test$probRight
bst_tda_kde_5.50.5_lr.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_lr.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr.n4_test),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.4607667
## 
## $winRight
## [1] 0.5392333
# Bayesian Correlated Test

bct_tda_kde_5.50.5_lr.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_lr.n4_test)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr.n4_test)) #bf_tda_pca_5.50.5_lr.n4_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_lr.n4_test))

##Node5

Adult_TDA_KDE_5.50.5_n5_LrFit0 <- train(as.factor(adult_df1) ~ ., 
                 data = tda.m_kde_adult_5.50.5.n5.vec, 
                 family = 'binomial',
                method = 'glm', 
                 trControl = fitControl,
                metric='Accuracy')
## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.50.5_n5_LrFit0
## Generalized Linear Model 
## 
## 7540 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 5027, 5027, 5026 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8665779  0.3834578
Adult_TDA_KDE_5.50.5_n5_LrFit0$resample
##    Accuracy     Kappa Resample
## 1 0.8627139 0.3699141    Fold1
## 2 0.8678870 0.4033372    Fold2
## 3 0.8691329 0.3771220    Fold3
ad_tda_kde_5.50.5_n5_lr_fit_re<-Adult_TDA_KDE_5.50.5_n5_LrFit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n5_LrFit0)
## 
## Call:
## NULL
## 
## Coefficients: (23 not defined because of singularities)
##                                  Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)                    -1.531e+13  1.949e+13   -0.786 0.432144    
## V1                              7.835e-02  6.899e-03   11.357  < 2e-16 ***
## V2..                            1.531e+13  1.949e+13    0.786 0.432144    
## V2.Federal.gov                  1.531e+13  1.949e+13    0.786 0.432144    
## V2.Local.gov                    1.531e+13  1.949e+13    0.786 0.432144    
## V2.Never.worked                -4.488e+15  1.949e+13 -230.295  < 2e-16 ***
## V2.Private                      1.531e+13  1.949e+13    0.786 0.432144    
## V2.Self.emp.inc                 1.531e+13  1.949e+13    0.786 0.432144    
## V2.Self.emp.not.inc             1.531e+13  1.949e+13    0.786 0.432144    
## V2.State.gov                    1.531e+13  1.949e+13    0.786 0.432144    
## V2.Without.pay                  1.531e+13  1.949e+13    0.786 0.432144    
## V3                              2.099e-06  9.540e-07    2.200 0.027778 *  
## V4.10th                                NA         NA       NA       NA    
## V4.11th                                NA         NA       NA       NA    
## V4.12th                                NA         NA       NA       NA    
## V4.1st.4th                             NA         NA       NA       NA    
## V4.5th.6th                             NA         NA       NA       NA    
## V4.7th.8th                             NA         NA       NA       NA    
## V4.9th                                 NA         NA       NA       NA    
## V4.Assoc.acdm                          NA         NA       NA       NA    
## V4.Assoc.voc                    4.025e-01  1.816e-01    2.216 0.026663 *  
## V4.Bachelors                           NA         NA       NA       NA    
## V4.Doctorate                           NA         NA       NA       NA    
## V4.HS.grad                     -3.119e-01  8.559e-02   -3.644 0.000268 ***
## V4.Masters                             NA         NA       NA       NA    
## V4.Preschool                           NA         NA       NA       NA    
## V4.Prof.school                         NA         NA       NA       NA    
## V4.Some.college                        NA         NA       NA       NA    
## V5                                     NA         NA       NA       NA    
## V6.Divorced                    -2.283e-01  1.066e+00   -0.214 0.830365    
## V6.Married.AF.spouse            2.486e+00  1.507e+00    1.650 0.098997 .  
## V6.Married.civ.spouse           2.141e+00  1.174e+00    1.825 0.068066 .  
## V6.Married.spouse.absent       -2.802e-01  1.305e+00   -0.215 0.829953    
## V6.Never.married               -5.760e-01  1.068e+00   -0.539 0.589690    
## V6.Separated                   -4.894e-01  1.139e+00   -0.430 0.667446    
## V6.Widowed                             NA         NA       NA       NA    
## V7..                                   NA         NA       NA       NA    
## V7.Adm.clerical                 3.736e-01  2.086e-01    1.791 0.073227 .  
## V7.Armed.Forces                -2.274e+01  1.525e+05    0.000 0.999881    
## V7.Craft.repair                 2.768e-01  1.663e-01    1.664 0.096174 .  
## V7.Exec.managerial              8.350e-01  1.840e-01    4.537 5.69e-06 ***
## V7.Farming.fishing             -3.688e-01  3.161e-01   -1.167 0.243340    
## V7.Handlers.cleaners           -4.164e-01  2.677e-01   -1.556 0.119827    
## V7.Machine.op.inspct            6.750e-02  1.994e-01    0.339 0.734898    
## V7.Other.service               -3.180e-01  2.459e-01   -1.293 0.196035    
## V7.Priv.house.serv             -2.393e+01  7.364e+04    0.000 0.999741    
## V7.Prof.specialty               1.189e+00  2.334e-01    5.093 3.52e-07 ***
## V7.Protective.serv              8.446e-01  2.740e-01    3.082 0.002056 ** 
## V7.Sales                        4.039e-01  1.859e-01    2.173 0.029765 *  
## V7.Tech.support                 1.113e+00  2.595e-01    4.291 1.78e-05 ***
## V7.Transport.moving                    NA         NA       NA       NA    
## V8.Husband                     -1.278e+00  3.035e-01   -4.212 2.53e-05 ***
## V8.Not.in.family               -7.738e-01  6.013e-01   -1.287 0.198168    
## V8.Other.relative              -1.731e+00  6.119e-01   -2.830 0.004662 ** 
## V8.Own.child                   -1.756e+00  5.661e-01   -3.101 0.001926 ** 
## V8.Unmarried                   -1.727e+00  6.715e-01   -2.573 0.010095 *  
## V8.Wife                                NA         NA       NA       NA    
## V9.Amer.Indian.Eskimo          -1.486e+00  5.768e-01   -2.576 0.009981 ** 
## V9.Asian.Pac.Islander           5.260e-01  4.353e-01    1.208 0.226894    
## V9.Black                       -5.663e-01  1.998e-01   -2.834 0.004591 ** 
## V9.Other                       -2.357e+00  1.110e+00   -2.124 0.033646 *  
## V9.White                               NA         NA       NA       NA    
## V10.Female                     -6.602e-01  2.631e-01   -2.509 0.012094 *  
## V10.Male                               NA         NA       NA       NA    
## V11                             3.697e-04  2.660e-05   13.899  < 2e-16 ***
## V12                             5.408e-04  8.660e-05    6.244 4.26e-10 ***
## V13                             2.854e-02  4.059e-03    7.032 2.04e-12 ***
## V14..                          -1.407e+00  1.435e+00   -0.980 0.326858    
## V14.Cambodia                   -2.607e+01  1.507e+05    0.000 0.999862    
## V14.Canada                     -1.840e+00  1.800e+00   -1.022 0.306601    
## V14.China                      -1.393e+00  1.707e+00   -0.816 0.414689    
## V14.Columbia                   -2.562e+01  8.344e+04    0.000 0.999755    
## V14.Cuba                        3.551e-01  1.558e+00    0.228 0.819681    
## V14.Dominican.Republic         -1.727e+01  1.399e+03   -0.012 0.990148    
## V14.Ecuador                    -1.909e-03  1.691e+00   -0.001 0.999099    
## V14.El.Salvador                -2.776e+00  1.783e+00   -1.557 0.119535    
## V14.England                    -6.973e-01  1.660e+00   -0.420 0.674469    
## V14.France                     -2.309e+01  1.625e+05    0.000 0.999887    
## V14.Germany                    -8.534e-01  1.495e+00   -0.571 0.568216    
## V14.Greece                     -1.636e+00  2.115e+00   -0.774 0.439051    
## V14.Guatemala                  -3.395e-01  1.813e+00   -0.187 0.851448    
## V14.Haiti                      -2.321e+01  1.092e+05    0.000 0.999830    
## V14.Holand.Netherlands                 NA         NA       NA       NA    
## V14.Honduras                   -2.470e+01  2.452e+05    0.000 0.999920    
## V14.Hong                       -2.668e+01  1.398e+05    0.000 0.999848    
## V14.Hungary                    -2.539e+01  3.120e+05    0.000 0.999935    
## V14.India                      -2.833e+00  2.209e+00   -1.282 0.199724    
## V14.Iran                        2.252e-01  1.758e+00    0.128 0.898083    
## V14.Ireland                    -1.216e+00  1.790e+00   -0.679 0.496897    
## V14.Italy                      -2.567e+00  1.866e+00   -1.376 0.168826    
## V14.Jamaica                    -1.321e-01  1.564e+00   -0.084 0.932709    
## V14.Japan                      -2.613e+01  8.646e+04    0.000 0.999759    
## V14.Laos                       -2.666e+01  1.732e+05    0.000 0.999877    
## V14.Mexico                     -1.881e+00  1.498e+00   -1.255 0.209357    
## V14.Nicaragua                  -2.527e+01  1.007e+05    0.000 0.999800    
## V14.Outlying.US.Guam.USVI.etc. -2.399e+01  1.271e+05    0.000 0.999849    
## V14.Peru                       -2.576e+01  1.247e+05    0.000 0.999835    
## V14.Philippines                -7.225e-01  1.581e+00   -0.457 0.647625    
## V14.Poland                     -1.867e+00  1.771e+00   -1.054 0.291869    
## V14.Portugal                   -6.629e-01  1.856e+00   -0.357 0.720983    
## V14.Puerto.Rico                -2.366e+00  1.781e+00   -1.329 0.183994    
## V14.Scotland                   -2.715e+01  2.557e+05    0.000 0.999915    
## V14.South                      -1.428e+00  1.710e+00   -0.835 0.403544    
## V14.Taiwan                     -2.466e+00  1.940e+00   -1.271 0.203759    
## V14.Thailand                   -2.201e+00  2.037e+00   -1.080 0.280071    
## V14.Trinadad.Tobago            -2.743e+01  2.557e+05    0.000 0.999914    
## V14.United.States              -1.026e+00  1.386e+00   -0.740 0.459267    
## V14.Vietnam                    -2.153e+00  1.842e+00   -1.169 0.242491    
## V14.Yugoslavia                         NA         NA       NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6474.2  on 7539  degrees of freedom
## Residual deviance: 4003.6  on 7454  degrees of freedom
## AIC: 4175.6
## 
## Number of Fisher Scoring iterations: 25
# Predict outcome using Adult_TDA_KDE_5.50.5_n5_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n5_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n5_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n5_lr_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6514   877
##      >50K     902  1475
##                                           
##                Accuracy : 0.8179          
##                  95% CI : (0.8101, 0.8255)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.5037          
##                                           
##  Mcnemar's Test P-Value : 0.5693          
##                                           
##             Sensitivity : 0.8784          
##             Specificity : 0.6271          
##          Pos Pred Value : 0.8813          
##          Neg Pred Value : 0.6205          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6669          
##    Detection Prevalence : 0.7567          
##       Balanced Accuracy : 0.7527          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n5_lr_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   6514   877
##      >50K     902  1475
##                                           
##                Accuracy : 0.8179          
##                  95% CI : (0.8101, 0.8255)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.5037          
##                                           
##  Mcnemar's Test P-Value : 0.5693          
##                                           
##             Sensitivity : 0.8784          
##             Specificity : 0.6271          
##          Pos Pred Value : 0.8813          
##          Neg Pred Value : 0.6205          
##              Prevalence : 0.7592          
##          Detection Rate : 0.6669          
##    Detection Prevalence : 0.7567          
##       Balanced Accuracy : 0.7527          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n5_lr_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.178747e-01   5.036691e-01   8.100754e-01   8.254840e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.100498e-44   5.693464e-01
ad_tda_kde_5.50.5_n5_lr_cf0_ov_acc<-ad_tda_kde_5.50.5_n5_lr_cf0$overall[1]
ad_tda_kde_5.50.5_n5_lr_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.8783711            0.6271259            0.8813422 
##       Neg Pred Value            Precision               Recall 
##            0.6205301            0.8813422            0.8783711 
##                   F1           Prevalence       Detection Rate 
##            0.8798541            0.7592138            0.6668714 
## Detection Prevalence    Balanced Accuracy 
##            0.7566544            0.7527485
ad_tda_kde_5.50.5_n5_lr_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n5_lr_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_lr_n5_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.50.5_n5_lr_fit_re)
diff_tda_kde_5.50.5_lr_n5_3_fold
##      Accuracy
## 1 -0.01222692
## 2 -0.01162794
## 3 -0.02470011
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_lr.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_lr.n5_3_fold_odds.left<-bst_tda_kde_5.50.5_lr.n5_3_fold$probLeft/bst_tda_kde_5.50.5_lr.n5_3_fold$probRight
bst_tda_kde_5.50.5_lr.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_lr.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n5_3_fold
## $winLeft
## [1] 0.9083
## 
## $winRope
## [1] 0.0917
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_lr.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n5_3_fold
## $left
## [1] 0.8321769
## 
## $rope
## [1] 0.1510523
## 
## $right
## [1] 0.01677077
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_lr_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr_n5_3_fold))
#bf_tda_kde_5.50.5_lr.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_lr_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_lr_n5_3_fold)
## t = -3.7983, df = 2, p-value = 0.06285
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.034518896  0.002148919
## sample estimates:
##   mean of x 
## -0.01618499
### Test set diff
diff_tda_kde_5.50.5_lr.n5_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n5_lr_cf0_ov_acc)
diff_tda_kde_5.50.5_lr.n5_test
##   Accuracy 
## 0.03316953
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_lr.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_lr.n5_test),-0.01,0.01)
bst_tda_kde_5.50.5_lr.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_lr.n5_test_odds.left<-bst_tda_kde_5.50.5_lr.n5_test$probLeft/bst_tda_kde_5.50.5_lr.n5_test$probRight
bst_tda_kde_5.50.5_lr.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_lr.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_lr.n5_test),-0.01,0.01)
bsr_tda_kde_5.50.5_lr.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1622
## 
## $winRight
## [1] 0.8378
# Bayesian Correlated Test

bct_tda_kde_5.50.5_lr.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_lr.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_lr.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_lr.n5_test)))

#BayesFactor
#bf_tda_kde_5.50.5_lr.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_lr.n5_test)) #bf_tda_pca_5.50.5_lr.n5_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_lr.n5_test))


#naiveBayes 
adultNbFit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc., V14.Thailand
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V14.Cambodia, V14.Holand.Netherlands, V14.Hungary, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Peru
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Columbia, V14.Dominican.Republic, V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
adultNbFit
## Naive Bayes 
## 
## 22793 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 15196, 15195, 15195 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa     
##   FALSE            NaN         NaN
##    TRUE      0.7648399  0.03456348
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
adultNbFit$resample
##    Accuracy       Kappa Resample
## 1 0.7603001 0.006626826    Fold1
## 2 0.7750724 0.097063606    Fold2
## 3 0.7591471 0.000000000    Fold3
ad_nb_fit_re<-adultNbFit$resample[1]

summary(adultNbFit)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
#varImp (adultNbFit)



# Predict outcome using model from training data based on testing data
predictions <- predict(adultNbFit, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
Nb_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
Nb_cf
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2223
##      >50K       0   129
##                                          
##                Accuracy : 0.7724         
##                  95% CI : (0.764, 0.7807)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 0.001113       
##                                          
##                   Kappa : 0.081          
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 1.00000        
##             Specificity : 0.05485        
##          Pos Pred Value : 0.76937        
##          Neg Pred Value : 1.00000        
##              Prevalence : 0.75921        
##          Detection Rate : 0.75921        
##    Detection Prevalence : 0.98679        
##       Balanced Accuracy : 0.52742        
##                                          
##        'Positive' Class :  <=50K         
## 
Nb_cf$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##    0.772420147    0.080978544    0.763973853    0.780703712    0.759213759 
## AccuracyPValue  McnemarPValue 
##    0.001113075    0.000000000
nb_cf_ov_acc<-Nb_cf$overall[1]
Nb_cf$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##           1.00000000           0.05484694           0.76937442 
##       Neg Pred Value            Precision               Recall 
##           1.00000000           0.76937442           1.00000000 
##                   F1           Prevalence       Detection Rate 
##           0.86965699           0.75921376           0.75921376 
## Detection Prevalence    Balanced Accuracy 
##           0.98679361           0.52742347
nb_cf_pre_rec_f1<-Nb_cf$byClass[5:7]


##With TDA PCA filter 5 intervals, 50% overlap, 5 bins 
##Node1

Adult_TDA_PC_5.50.5_n1_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n1.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Preschool, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Never.married, V6.Separated, V6.Widowed, V7.., V7.Adm.clerical, V7.Armed.Forces, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Other.service, V7.Priv.house.serv, V7.Protective.serv, V7.Tech.support, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V9.Amer.Indian.Eskimo, V9.Black, V9.Other, V10.Female, V10.Male, V14.Cambodia, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Preschool, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Never.married, V6.Separated, V6.Widowed, V7.., V7.Adm.clerical, V7.Armed.Forces, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Other.service, V7.Priv.house.serv, V7.Protective.serv, V7.Tech.support, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V9.Amer.Indian.Eskimo, V9.Black, V9.Other, V10.Female, V10.Male, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Preschool, V4.Some.college, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Never.married, V6.Separated, V6.Widowed, V7.., V7.Adm.clerical, V7.Armed.Forces, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Other.service, V7.Priv.house.serv, V7.Protective.serv, V7.Tech.support, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V9.Amer.Indian.Eskimo, V9.Black, V9.Other, V10.Female, V10.Male, V14.Cambodia, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.50.5_n1_NbFit0
## Naive Bayes 
## 
## 4917 samples
##  108 predictor
##    2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 3277, 3278, 3279 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE            NaN  NaN  
##    TRUE      0.9733579    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_PC_5.50.5_n1_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.9731707     0    Fold1
## 2 0.9731544     0    Fold2
## 3 0.9737485     0    Fold3
ad_tda_pc_5.50.5_n1_nb_fit_re<-Adult_TDA_PC_5.50.5_n1_NbFit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n1_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_PC_5.50.5_n1_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n1_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n1_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n1_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K      0     0
##      >50K    7416  2352
##                                           
##                Accuracy : 0.2408          
##                  95% CI : (0.2323, 0.2494)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0000          
##             Specificity : 1.0000          
##          Pos Pred Value :    NaN          
##          Neg Pred Value : 0.2408          
##              Prevalence : 0.7592          
##          Detection Rate : 0.0000          
##    Detection Prevalence : 0.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n1_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K      0     0
##      >50K    7416  2352
##                                           
##                Accuracy : 0.2408          
##                  95% CI : (0.2323, 0.2494)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.0000          
##             Specificity : 1.0000          
##          Pos Pred Value :    NaN          
##          Neg Pred Value : 0.2408          
##              Prevalence : 0.7592          
##          Detection Rate : 0.0000          
##    Detection Prevalence : 0.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n1_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.2407862      0.0000000      0.2323343      0.2493929      0.7592138 
## AccuracyPValue  McnemarPValue 
##      1.0000000      0.0000000
ad_tda_pc_5.50.5_n1_nb_cf0_ov_acc<-ad_tda_pc_5.50.5_n1_nb_cf0$overall[1]
ad_tda_pc_5.50.5_n1_nb_cf0$byClas1
## NULL
ad_tda_pc_5.50.5_n1_nb_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n1_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_nb_n1_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.50.5_n1_nb_fit_re)
diff_tda_pca_5.50.5_nb_n1_3_fold
##     Accuracy
## 1 -0.2128706
## 2 -0.1980820
## 3 -0.2146013
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nb.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n1_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nb.n1_3_fold_odds.left<-bst_tda_pca_5.50.5_nb.n1_3_fold$probLeft/bst_tda_pca_5.50.5_nb.n1_3_fold$probRight
bst_tda_pca_5.50.5_nb.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nb.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n1_3_fold
## $winLeft
## [1] 0.9917333
## 
## $winRope
## [1] 0.008266667
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nb.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n1_3_fold
## $left
## [1] 0.9995358
## 
## $rope
## [1] 8.098509e-05
## 
## $right
## [1] 0.000383183
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nb_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb_n1_3_fold))
#bf_tda_pca_5.50.5_nb.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nb_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_nb_n1_3_fold)
## t = -39.779, df = 2, p-value = 0.0006314
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2310719 -0.1859641
## sample estimates:
## mean of x 
## -0.208518
### Test set diff
diff_tda_pca_5.50.5_nb.n1_test<-(nb_cf_ov_acc - ad_tda_pc_5.50.5_n1_nb_cf0_ov_acc)
diff_tda_pca_5.50.5_nb.n1_test
##  Accuracy 
## 0.5316339
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nb.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb.n1_test),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nb.n1_test_odds.left<-bst_tda_pca_5.50.5_nb.n1_test$probLeft/bst_tda_pca_5.50.5_nb.n1_test$probRight
bst_tda_pca_5.50.5_nb.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nb.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb.n1_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1610333
## 
## $winRight
## [1] 0.8389667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nb.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nb.n1_test)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb.n1_test)) #bf_tda_pca_5.50.5_nb.n1_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nb.n1_test))

##Node2

Adult_TDA_PC_5.50.5_n2_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Unmarried, V14.Dominican.Republic, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Ireland, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv, V8.Unmarried, V14.Dominican.Republic, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Thailand, V14.Trinadad.Tobago
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv, V8.Unmarried, V14.Dominican.Republic, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.50.5_n2_NbFit0
## Naive Bayes 
## 
## 12206 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8138, 8137, 8137 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa    
##   FALSE            NaN        NaN
##    TRUE      0.5670992  0.1900125
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_PC_5.50.5_n2_NbFit0$resample
##    Accuracy     Kappa Resample
## 1 0.5796460 0.2117894    Fold1
## 2 0.5785205 0.2093729    Fold2
## 3 0.5431310 0.1488753    Fold3
ad_tda_pc_5.50.5_n2_nb_fit_re<-Adult_TDA_PC_5.50.5_n2_NbFit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n2_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_PC_5.50.5_n2_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n2_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n2_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n2_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   3584  1595
##      >50K    3832   757
##                                           
##                Accuracy : 0.4444          
##                  95% CI : (0.4345, 0.4543)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.1471         
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.4833          
##             Specificity : 0.3219          
##          Pos Pred Value : 0.6920          
##          Neg Pred Value : 0.1650          
##              Prevalence : 0.7592          
##          Detection Rate : 0.3669          
##    Detection Prevalence : 0.5302          
##       Balanced Accuracy : 0.4026          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n2_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   3584  1595
##      >50K    3832   757
##                                           
##                Accuracy : 0.4444          
##                  95% CI : (0.4345, 0.4543)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.1471         
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.4833          
##             Specificity : 0.3219          
##          Pos Pred Value : 0.6920          
##          Neg Pred Value : 0.1650          
##              Prevalence : 0.7592          
##          Detection Rate : 0.3669          
##    Detection Prevalence : 0.5302          
##       Balanced Accuracy : 0.4026          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n2_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   4.444103e-01  -1.470993e-01   4.345229e-01   4.543309e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.000000e+00  2.341485e-202
ad_tda_pc_5.50.5_n2_nb_cf0_ov_acc<-ad_tda_pc_5.50.5_n2_nb_cf0$overall[1]
ad_tda_pc_5.50.5_n2_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.4832794            0.3218537            0.6920255 
##       Neg Pred Value            Precision               Recall 
##            0.1649597            0.6920255            0.4832794 
##                   F1           Prevalence       Detection Rate 
##            0.5691147            0.7592138            0.3669124 
## Detection Prevalence    Balanced Accuracy 
##            0.5302007            0.4025666
ad_tda_pc_5.50.5_n2_nb_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n2_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_nb_n2_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.50.5_n2_nb_fit_re)
diff_tda_pca_5.50.5_nb_n2_3_fold
##    Accuracy
## 1 0.1806541
## 2 0.1965519
## 3 0.2160162
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nb.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nb.n2_3_fold_odds.left<-bst_tda_pca_5.50.5_nb.n2_3_fold$probLeft/bst_tda_pca_5.50.5_nb.n2_3_fold$probRight
bst_tda_pca_5.50.5_nb.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nb.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.0089
## 
## $winRight
## [1] 0.9911
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nb.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n2_3_fold
## $left
## [1] 0.001607426
## 
## $rope
## [1] 0.000358595
## 
## $right
## [1] 0.998034
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nb_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb_n2_3_fold))
#bf_tda_pca_5.50.5_nb.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nb_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_nb_n2_3_fold)
## t = 19.338, df = 2, p-value = 0.002663
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.1537442 0.2417372
## sample estimates:
## mean of x 
## 0.1977407
### Test set diff
diff_tda_pca_5.50.5_nb.n2_test<-(nb_cf_ov_acc - ad_tda_pc_5.50.5_n2_nb_cf0_ov_acc)
diff_tda_pca_5.50.5_nb.n2_test
##  Accuracy 
## 0.3280098
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nb.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb.n2_test),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nb.n2_test_odds.left<-bst_tda_pca_5.50.5_nb.n2_test$probLeft/bst_tda_pca_5.50.5_nb.n2_test$probRight
bst_tda_pca_5.50.5_nb.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nb.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb.n2_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1575
## 
## $winRight
## [1] 0.8425
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nb.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nb.n2_test)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb.n2_test)) #bf_tda_pca_5.50.5_nb.n2_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nb.n2_test))

##Node3

Adult_TDA_PC_5.50.5_n3_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n3.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Columbia, V14.Dominican.Republic, V14.Greece, V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Portugal
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Outlying.US.Guam.USVI.etc., V14.Peru
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Outlying.US.Guam.USVI.etc., V14.Thailand
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.50.5_n3_NbFit0
## Naive Bayes 
## 
## 13240 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8827, 8826, 8827 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa      
##   FALSE            NaN          NaN
##    TRUE      0.7723564  0.006076996
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_PC_5.50.5_n3_NbFit0$resample
##    Accuracy      Kappa Resample
## 1 0.7715840 0.00000000    Fold1
## 2 0.7741278 0.01823099    Fold2
## 3 0.7713574 0.00000000    Fold3
ad_tda_pc_5.50.5_n3_nb_fit_re<-Adult_TDA_PC_5.50.5_n3_NbFit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n3_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_PC_5.50.5_n3_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n3_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n3_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n3_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2348
##      >50K       0     4
##                                          
##                Accuracy : 0.7596         
##                  95% CI : (0.751, 0.7681)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 0.4678         
##                                          
##                   Kappa : 0.0026         
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 1.000000       
##             Specificity : 0.001701       
##          Pos Pred Value : 0.759525       
##          Neg Pred Value : 1.000000       
##              Prevalence : 0.759214       
##          Detection Rate : 0.759214       
##    Detection Prevalence : 0.999590       
##       Balanced Accuracy : 0.500850       
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_pc_5.50.5_n3_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2348
##      >50K       0     4
##                                          
##                Accuracy : 0.7596         
##                  95% CI : (0.751, 0.7681)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 0.4678         
##                                          
##                   Kappa : 0.0026         
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 1.000000       
##             Specificity : 0.001701       
##          Pos Pred Value : 0.759525       
##          Neg Pred Value : 1.000000       
##              Prevalence : 0.759214       
##          Detection Rate : 0.759214       
##    Detection Prevalence : 0.999590       
##       Balanced Accuracy : 0.500850       
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_pc_5.50.5_n3_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##    0.759623260    0.002580085    0.751021356    0.768070101    0.759213759 
## AccuracyPValue  McnemarPValue 
##    0.467802791    0.000000000
ad_tda_pc_5.50.5_n3_nb_cf0_ov_acc<-ad_tda_pc_5.50.5_n3_nb_cf0$overall[1]
ad_tda_pc_5.50.5_n3_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##           1.00000000           0.00170068           0.75952478 
##       Neg Pred Value            Precision               Recall 
##           1.00000000           0.75952478           1.00000000 
##                   F1           Prevalence       Detection Rate 
##           0.86332945           0.75921376           0.75921376 
## Detection Prevalence    Balanced Accuracy 
##           0.99959050           0.50085034
ad_tda_pc_5.50.5_n3_nb_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n3_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_nb_n3_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.50.5_n3_nb_fit_re)
diff_tda_pca_5.50.5_nb_n3_3_fold
##        Accuracy
## 1 -0.0112838380
## 2  0.0009446122
## 3 -0.0122102093
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nb.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n3_3_fold
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nb.n3_3_fold_odds.left<-bst_tda_pca_5.50.5_nb.n3_3_fold$probLeft/bst_tda_pca_5.50.5_nb.n3_3_fold$probRight
bst_tda_pca_5.50.5_nb.n3_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nb.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n3_3_fold
## $winLeft
## [1] 0.3229667
## 
## $winRope
## [1] 0.6770333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nb.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n3_3_fold
## $left
## [1] 0.331151
## 
## $rope
## [1] 0.6338555
## 
## $right
## [1] 0.03499351
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nb_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb_n3_3_fold))
#bf_tda_pca_5.50.5_nb.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nb_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_nb_n3_3_fold)
## t = -1.7732, df = 2, p-value = 0.2182
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.02575538  0.01072242
## sample estimates:
##    mean of x 
## -0.007516478
### Test set diff
diff_tda_pca_5.50.5_nb.n3_test<-(nb_cf_ov_acc - ad_tda_pc_5.50.5_n3_nb_cf0_ov_acc)
diff_tda_pca_5.50.5_nb.n3_test
##   Accuracy 
## 0.01279689
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nb.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb.n3_test),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nb.n3_test_odds.left<-bst_tda_pca_5.50.5_nb.n3_test$probLeft/bst_tda_pca_5.50.5_nb.n3_test$probRight
bst_tda_pca_5.50.5_nb.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nb.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb.n2_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1586333
## 
## $winRight
## [1] 0.8413667
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nb.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nb.n3_test)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb.n3_test)) #bf_tda_pca_5.50.5_nb.n3_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nb.n3_test))

##Node4

Adult_TDA_PC_5.50.5_n4_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n4.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.India, V14.Iran, V14.Ireland, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Thailand, V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.El.Salvador, V14.France, V14.Greece, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Iran, V14.Jamaica, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Thailand, V14.Trinadad.Tobago
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.50.5_n4_NbFit0
## Naive Bayes 
## 
## 16700 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 11133, 11133, 11134 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE            NaN  NaN  
##    TRUE      0.9449102    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_PC_5.50.5_n4_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.9448536     0    Fold1
## 2 0.9448536     0    Fold2
## 3 0.9450234     0    Fold3
ad_tda_pc_5.50.5_n4_nb_fit_re<-Adult_TDA_PC_5.50.5_n4_NbFit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n4_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_PC_5.50.5_n4_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n4_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n4_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n4_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n4_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n4_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_pc_5.50.5_n4_nb_cf0_ov_acc<-ad_tda_pc_5.50.5_n4_nb_cf0$overall[1]
ad_tda_pc_5.50.5_n4_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_pc_5.50.5_n4_nb_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n4_nb_cf0$byClass[5:7]


###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_nb_n4_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.50.5_n4_nb_fit_re)
diff_tda_pca_5.50.5_nb_n4_3_fold
##     Accuracy
## 1 -0.1845535
## 2 -0.1697812
## 3 -0.1858762
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nb.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nb.n4_3_fold_odds.left<-bst_tda_pca_5.50.5_nb.n4_3_fold$probLeft/bst_tda_pca_5.50.5_nb.n4_3_fold$probRight
bst_tda_pca_5.50.5_nb.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nb.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n4_3_fold
## $winLeft
## [1] 0.9902
## 
## $winRope
## [1] 0.0098
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nb.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n4_3_fold
## $left
## [1] 0.9993877
## 
## $rope
## [1] 0.0001218899
## 
## $right
## [1] 0.0004903664
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nb_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold))
#bf_tda_pca_5.50.5_nb.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold)
## t = -34.906, df = 2, p-value = 0.0008197
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2022664 -0.1578742
## sample estimates:
##  mean of x 
## -0.1800703
### Test set diff
diff_tda_pca_5.50.5_nb.n4_test<-(nb_cf_ov_acc - ad_tda_pc_5.50.5_n4_nb_cf0_ov_acc)
diff_tda_pca_5.50.5_nb.n4_test
##   Accuracy 
## 0.01320639
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nb.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb.n4_test),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nb.n4_test_odds.left<-bst_tda_pca_5.50.5_nb.n4_test$probLeft/bst_tda_pca_5.50.5_nb.n4_test$probRight
bst_tda_pca_5.50.5_nb.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nb.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb.n4_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.4594667
## 
## $winRight
## [1] 0.5405333
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nb.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nb.n4_test)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb.n4_test)) #bf_tda_pca_5.50.5_nb.n4_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nb.n4_test))

##Node5

Adult_TDA_PC_5.50.5_n5_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n5.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.voc, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Married.AF.spouse, V6.Married.civ.spouse, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Exec.managerial, V7.Farming.fishing, V7.Handlers.cleaners, V7.Priv.house.serv, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Other, V10.Female, V10.Male, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Local.gov, V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.State.gov, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Separated, V6.Widowed, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Farming.fishing, V7.Handlers.cleaners, V7.Priv.house.serv, V7.Protective.serv, V7.Sales, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Other, V10.Female, V10.Male, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Married.AF.spouse, V6.Married.civ.spouse, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Farming.fishing, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Priv.house.serv, V7.Prof.specialty, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Other, V10.Female, V10.Male, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.50.5_n5_NbFit0
## Naive Bayes 
## 
## 14404 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9602, 9604, 9602 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE            NaN  NaN  
##    TRUE      0.9979867    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_PC_5.50.5_n5_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.9979175     0    Fold1
## 2 0.9981250     0    Fold2
## 3 0.9979175     0    Fold3
ad_tda_pc_5.50.5_n5_nb_fit_re<-Adult_TDA_PC_5.50.5_n5_NbFit0$resample[1]

summary(Adult_TDA_PC_5.50.5_n5_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_PC_5.50.5_n5_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.50.5_n5_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.50.5_n5_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.50.5_n5_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n5_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_pc_5.50.5_n5_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_pc_5.50.5_n5_nb_cf0_ov_acc<-ad_tda_pc_5.50.5_n5_nb_cf0$overall[1]
ad_tda_pc_5.50.5_n5_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_pc_5.50.5_n5_nb_cf0_pre_rec_f1<-ad_tda_pc_5.50.5_n5_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_pca_5.50.5_nb_n5_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.50.5_n5_nb_fit_re)
diff_tda_pca_5.50.5_nb_n5_3_fold
##     Accuracy
## 1 -0.2376174
## 2 -0.2230526
## 3 -0.2387704
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nb.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nb.n5_3_fold_odds.left<-bst_tda_pca_5.50.5_nb.n5_3_fold$probLeft/bst_tda_pca_5.50.5_nb.n5_3_fold$probRight
bst_tda_pca_5.50.5_nb.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nb.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n5_3_fold
## $winLeft
## [1] 0.9917
## 
## $winRope
## [1] 0.0083
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nb.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n5_3_fold
## $left
## [1] 0.9996578
## 
## $rope
## [1] 5.392945e-05
## 
## $right
## [1] 0.0002882463
# Rope Plot
plot(rope(diff_tda_pca_5.50.5_nb_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb_n5_3_fold))
#bf_tda_pca_5.50.5_nb.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_pca_5.50.5_nb_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_pca_5.50.5_nb_n5_3_fold)
## t = -46.094, df = 2, p-value = 0.0004703
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2549099 -0.2113837
## sample estimates:
##  mean of x 
## -0.2331468
### Test set diff
diff_tda_pca_5.50.5_nb.n5_test<-(nb_cf_ov_acc - ad_tda_pc_5.50.5_n5_nb_cf0_ov_acc)
diff_tda_pca_5.50.5_nb.n5_test
##   Accuracy 
## 0.01320639
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_pca_5.50.5_nb.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.50.5_nb.n5_test),-0.01,0.01)
bst_tda_pca_5.50.5_nb.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_pca_5.50.5_nb.n5_test_odds.left<-bst_tda_pca_5.50.5_nb.n5_test$probLeft/bst_tda_pca_5.50.5_nb.n5_test$probRight
bst_tda_pca_5.50.5_nb.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_pca_5.50.5_nb.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.50.5_nb.n5_test),-0.01,0.01)
bsr_tda_pca_5.50.5_nb.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.4610667
## 
## $winRight
## [1] 0.5389333
# Bayesian Correlated Test

bct_tda_pca_5.50.5_nb.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.50.5_nb.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.50.5_nb.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.50.5_nb.n5_test)))

#BayesFactor
#bf_tda_pca_5.50.5_nb.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.50.5_nb.n5_test)) #bf_tda_pca_5.50.5_nb.n5_test

#t_test
#t.test(as.matrix(diff_tda_pca_5.50.5_nb.n5_test))

##With TDA KDE filter 5 intervals, 50% overlap, 5 bins 
##Node1

Adult_TDA_KDE_5.50.5_n1_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.50.5.n1.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Ireland, V14.Outlying.US.Guam.USVI.etc., V14.Portugal, V14.Thailand
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Ecuador, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.50.5_n1_NbFit0
## Naive Bayes 
## 
## 13387 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8924, 8925, 8925 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa    
##   FALSE            NaN        NaN
##    TRUE      0.7541639  0.0741204
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_KDE_5.50.5_n1_NbFit0$resample
##    Accuracy      Kappa Resample
## 1 0.7613713 0.11345994    Fold1
## 2 0.7449574 0.02413639    Fold2
## 3 0.7561632 0.08476488    Fold3
ad_tda_kde_5.50.5_n1_nb_fit_re<-Adult_TDA_KDE_5.50.5_n1_NbFit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n1_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_KDE_5.50.5_n1_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n1_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n1_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n1_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2195
##      >50K       0   157
##                                           
##                Accuracy : 0.7753          
##                  95% CI : (0.7669, 0.7835)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 9.556e-05       
##                                           
##                   Kappa : 0.098           
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 1.00000         
##             Specificity : 0.06675         
##          Pos Pred Value : 0.77162         
##          Neg Pred Value : 1.00000         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75921         
##    Detection Prevalence : 0.98393         
##       Balanced Accuracy : 0.53338         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n1_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2195
##      >50K       0   157
##                                           
##                Accuracy : 0.7753          
##                  95% CI : (0.7669, 0.7835)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 9.556e-05       
##                                           
##                   Kappa : 0.098           
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 1.00000         
##             Specificity : 0.06675         
##          Pos Pred Value : 0.77162         
##          Neg Pred Value : 1.00000         
##              Prevalence : 0.75921         
##          Detection Rate : 0.75921         
##    Detection Prevalence : 0.98393         
##       Balanced Accuracy : 0.53338         
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n1_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   7.752867e-01   9.796737e-02   7.668767e-01   7.835322e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   9.556408e-05   0.000000e+00
ad_tda_kde_5.50.5_n1_nb_cf0_ov_acc<-ad_tda_kde_5.50.5_n1_nb_cf0$overall[1]
ad_tda_kde_5.50.5_n1_nb_cf0$byClas1
## NULL
ad_tda_kde_5.50.5_n1_nb_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n1_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_nb_n1_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.50.5_n1_nb_fit_re)
diff_tda_kde_5.50.5_nb_n1_3_fold
##       Accuracy
## 1 -0.001071156
## 2  0.030114969
## 3  0.002983988
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nb.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n1_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.75
## 
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nb.n1_3_fold_odds.left<-bst_tda_kde_5.50.5_nb.n1_3_fold$probLeft/bst_tda_kde_5.50.5_nb.n1_3_fold$probRight
bst_tda_kde_5.50.5_nb.n1_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nb.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n1_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.5771333
## 
## $winRight
## [1] 0.4228667
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nb.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n1_3_fold
## $left
## [1] 0.1044478
## 
## $rope
## [1] 0.3744305
## 
## $right
## [1] 0.5211218
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nb_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb_n1_3_fold))
#bf_tda_kde_5.50.5_nb.n1_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nb_n1_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_nb_n1_3_fold)
## t = 1.0905, df = 2, p-value = 0.3894
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.03144600  0.05279787
## sample estimates:
##  mean of x 
## 0.01067593
### Test set diff
diff_tda_kde_5.50.5_nb.n1_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n1_nb_cf0_ov_acc)
diff_tda_kde_5.50.5_nb.n1_test
##   Accuracy 
## 0.07575758
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nb.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb.n1_test),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n1_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nb.n1_test_odds.left<-bst_tda_kde_5.50.5_nb.n1_test$probLeft/bst_tda_kde_5.50.5_nb.n1_test$probRight
bst_tda_kde_5.50.5_nb.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nb.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb.n1_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n1_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1569333
## 
## $winRight
## [1] 0.8430667
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nb.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n1_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nb.n1_test)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb.n1_test)) #bf_tda_pca_5.50.5_nb.n1_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nb.n1_test))

##Node2

Adult_TDA_KDE_5.50.5_n2_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n2.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Priv.house.serv, V8.Unmarried, V14.Dominican.Republic, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Ireland, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Unmarried, V14.Cambodia, V14.Dominican.Republic, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Trinadad.Tobago
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Unmarried, V14.Dominican.Republic, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.50.5_n2_NbFit0
## Naive Bayes 
## 
## 12206 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8138, 8138, 8136 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa    
##   FALSE            NaN        NaN
##    TRUE      0.6282283  0.2926769
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_KDE_5.50.5_n2_NbFit0$resample
##    Accuracy     Kappa Resample
## 1 0.7082104 0.4265867    Fold1
## 2 0.6258604 0.2898944    Fold2
## 3 0.5506143 0.1615495    Fold3
ad_tda_kde_5.50.5_n2_nb_fit_re<-Adult_TDA_KDE_5.50.5_n2_NbFit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n2_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_KDE_5.50.5_n2_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n2_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n2_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n2_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   3584  1595
##      >50K    3832   757
##                                           
##                Accuracy : 0.4444          
##                  95% CI : (0.4345, 0.4543)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.1471         
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.4833          
##             Specificity : 0.3219          
##          Pos Pred Value : 0.6920          
##          Neg Pred Value : 0.1650          
##              Prevalence : 0.7592          
##          Detection Rate : 0.3669          
##    Detection Prevalence : 0.5302          
##       Balanced Accuracy : 0.4026          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n2_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   3584  1595
##      >50K    3832   757
##                                           
##                Accuracy : 0.4444          
##                  95% CI : (0.4345, 0.4543)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.1471         
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 0.4833          
##             Specificity : 0.3219          
##          Pos Pred Value : 0.6920          
##          Neg Pred Value : 0.1650          
##              Prevalence : 0.7592          
##          Detection Rate : 0.3669          
##    Detection Prevalence : 0.5302          
##       Balanced Accuracy : 0.4026          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n2_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   4.444103e-01  -1.470993e-01   4.345229e-01   4.543309e-01   7.592138e-01 
## AccuracyPValue  McnemarPValue 
##   1.000000e+00  2.341485e-202
ad_tda_kde_5.50.5_n2_nb_cf0_ov_acc<-ad_tda_kde_5.50.5_n2_nb_cf0$overall[1]
ad_tda_kde_5.50.5_n2_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            0.4832794            0.3218537            0.6920255 
##       Neg Pred Value            Precision               Recall 
##            0.1649597            0.6920255            0.4832794 
##                   F1           Prevalence       Detection Rate 
##            0.5691147            0.7592138            0.3669124 
## Detection Prevalence    Balanced Accuracy 
##            0.5302007            0.4025666
ad_tda_kde_5.50.5_n2_nb_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n2_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_nb_n2_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.50.5_n2_nb_fit_re)
diff_tda_kde_5.50.5_nb_n2_3_fold
##    Accuracy
## 1 0.0520897
## 2 0.1492120
## 3 0.2085329
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nb.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n2_3_fold
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nb.n2_3_fold_odds.left<-bst_tda_kde_5.50.5_nb.n2_3_fold$probLeft/bst_tda_kde_5.50.5_nb.n2_3_fold$probRight
bst_tda_kde_5.50.5_nb.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nb.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n2_3_fold
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.0098
## 
## $winRight
## [1] 0.9902
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nb.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n2_3_fold
## $left
## [1] 0.05420179
## 
## $rope
## [1] 0.01480854
## 
## $right
## [1] 0.9309897
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nb_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb_n2_3_fold))
#bf_tda_kde_5.50.5_nb.n2_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nb_n2_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_nb_n2_3_fold)
## t = 2.996, df = 2, p-value = 0.09569
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.05958342  0.33280648
## sample estimates:
## mean of x 
## 0.1366115
### Test set diff
diff_tda_kde_5.50.5_nb.n2_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n2_nb_cf0_ov_acc)
diff_tda_kde_5.50.5_nb.n2_test
##  Accuracy 
## 0.4066339
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nb.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb.n2_test),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n2_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nb.n2_test_odds.left<-bst_tda_kde_5.50.5_nb.n2_test$probLeft/bst_tda_kde_5.50.5_nb.n2_test$probRight
bst_tda_kde_5.50.5_nb.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nb.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb.n2_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n2_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1568333
## 
## $winRight
## [1] 0.8431667
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nb.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n2_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nb.n2_test)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb.n2_test)) #bf_tda_kde_5.50.5_nb.n2_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nb.n2_test))

##Node3

Adult_TDA_KDE_5.50.5_n3_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n3.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras, V14.Ireland, V14.Laos, V14.Outlying.US.Guam.USVI.etc.
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Columbia, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc., V14.Peru
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Outlying.US.Guam.USVI.etc., V14.Portugal, V14.Thailand
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.50.5_n3_NbFit0
## Naive Bayes 
## 
## 13240 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 8828, 8826, 8826 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE            NaN  NaN  
##    TRUE      0.7714502    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_KDE_5.50.5_n3_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.7715322     0    Fold1
## 2 0.7714092     0    Fold2
## 3 0.7714092     0    Fold3
ad_tda_kde_5.50.5_n3_nb_fit_re<-Adult_TDA_KDE_5.50.5_n3_NbFit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n3_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_KDE_5.50.5_n3_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n3_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n3_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n3_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2348
##      >50K       0     4
##                                          
##                Accuracy : 0.7596         
##                  95% CI : (0.751, 0.7681)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 0.4678         
##                                          
##                   Kappa : 0.0026         
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 1.000000       
##             Specificity : 0.001701       
##          Pos Pred Value : 0.759525       
##          Neg Pred Value : 1.000000       
##              Prevalence : 0.759214       
##          Detection Rate : 0.759214       
##    Detection Prevalence : 0.999590       
##       Balanced Accuracy : 0.500850       
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.50.5_n3_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2348
##      >50K       0     4
##                                          
##                Accuracy : 0.7596         
##                  95% CI : (0.751, 0.7681)
##     No Information Rate : 0.7592         
##     P-Value [Acc > NIR] : 0.4678         
##                                          
##                   Kappa : 0.0026         
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 1.000000       
##             Specificity : 0.001701       
##          Pos Pred Value : 0.759525       
##          Neg Pred Value : 1.000000       
##              Prevalence : 0.759214       
##          Detection Rate : 0.759214       
##    Detection Prevalence : 0.999590       
##       Balanced Accuracy : 0.500850       
##                                          
##        'Positive' Class :  <=50K         
## 
ad_tda_kde_5.50.5_n3_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##    0.759623260    0.002580085    0.751021356    0.768070101    0.759213759 
## AccuracyPValue  McnemarPValue 
##    0.467802791    0.000000000
ad_tda_kde_5.50.5_n3_nb_cf0_ov_acc<-ad_tda_kde_5.50.5_n3_nb_cf0$overall[1]
ad_tda_kde_5.50.5_n3_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##           1.00000000           0.00170068           0.75952478 
##       Neg Pred Value            Precision               Recall 
##           1.00000000           0.75952478           1.00000000 
##                   F1           Prevalence       Detection Rate 
##           0.86332945           0.75921376           0.75921376 
## Detection Prevalence    Balanced Accuracy 
##           0.99959050           0.50085034
ad_tda_kde_5.50.5_n3_nb_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n3_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_nb_n3_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.50.5_n3_nb_fit_re)
diff_tda_kde_5.50.5_nb_n3_3_fold
##       Accuracy
## 1 -0.011232066
## 2  0.003663235
## 3 -0.012262009
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nb.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n3_3_fold
## $probLeft
## [1] 0.5
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nb.n3_3_fold_odds.left<-bst_tda_kde_5.50.5_nb.n3_3_fold$probLeft/bst_tda_kde_5.50.5_nb.n3_3_fold$probRight
bst_tda_kde_5.50.5_nb.n3_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nb.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n3_3_fold
## $winLeft
## [1] 0.3242667
## 
## $winRope
## [1] 0.6757333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nb.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n3_3_fold
## $left
## [1] 0.3129359
## 
## $rope
## [1] 0.6332286
## 
## $right
## [1] 0.05383551
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nb_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb_n3_3_fold))
#bf_tda_kde_5.50.5_nb.n3_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nb_n3_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_nb_n3_3_fold)
## t = -1.2847, df = 2, p-value = 0.3276
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.02874895  0.01552839
## sample estimates:
##   mean of x 
## -0.00661028
### Test set diff
diff_tda_kde_5.50.5_nb.n3_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n3_nb_cf0_ov_acc)
diff_tda_kde_5.50.5_nb.n3_test
##   Accuracy 
## 0.09142097
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nb.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb.n3_test),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n3_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nb.n3_test_odds.left<-bst_tda_kde_5.50.5_nb.n3_test$probLeft/bst_tda_kde_5.50.5_nb.n3_test$probRight
bst_tda_kde_5.50.5_nb.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nb.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb.n3_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n3_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1598667
## 
## $winRight
## [1] 0.8401333
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nb.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n3_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nb.n3_test)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb.n3_test)) #bf_tda_kde_5.50.5_nb.n3_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nb.n3_test))


##Node4

Adult_TDA_KDE_5.50.5_n4_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n4.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.El.Salvador, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Iran, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Thailand, V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.France, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Iran, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.Greece, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Thailand, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.50.5_n4_NbFit0
## Naive Bayes 
## 
## 16700 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 11134, 11133, 11133 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE            NaN  NaN  
##    TRUE      0.9449102    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_KDE_5.50.5_n4_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.9450234     0    Fold1
## 2 0.9448536     0    Fold2
## 3 0.9448536     0    Fold3
ad_tda_kde_5.50.5_n4_nb_fit_re<-Adult_TDA_KDE_5.50.5_n4_NbFit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n4_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_KDE_5.50.5_n4_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n4_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n4_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n4_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n4_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n4_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_kde_5.50.5_n4_nb_cf0_ov_acc<-ad_tda_kde_5.50.5_n4_nb_cf0$overall[1]
ad_tda_kde_5.50.5_n4_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_kde_5.50.5_n4_nb_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n4_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_nb_n4_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.50.5_n4_nb_fit_re)
diff_tda_kde_5.50.5_nb_n4_3_fold
##     Accuracy
## 1 -0.1847232
## 2 -0.1697812
## 3 -0.1857065
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nb.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n4_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nb.n4_3_fold_odds.left<-bst_tda_kde_5.50.5_nb.n4_3_fold$probLeft/bst_tda_kde_5.50.5_nb.n4_3_fold$probRight
bst_tda_kde_5.50.5_nb.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nb.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n4_3_fold
## $winLeft
## [1] 0.9916
## 
## $winRope
## [1] 0.0084
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nb.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n4_3_fold
## $left
## [1] 0.9993892
## 
## $rope
## [1] 0.0001215921
## 
## $right
## [1] 0.000489166
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nb_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb_n4_3_fold))
#bf_tda_kde_5.50.5_nb.n4_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nb_n4_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_nb_n4_3_fold)
## t = -34.949, df = 2, p-value = 0.0008177
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2022392 -0.1579015
## sample estimates:
##  mean of x 
## -0.1800703
### Test set diff
diff_tda_kde_5.50.5_nb.n4_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n4_nb_cf0_ov_acc)
diff_tda_kde_5.50.5_nb.n4_test
##   Accuracy 
## 0.09183047
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nb.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb.n4_test),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n4_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nb.n4_test_odds.left<-bst_tda_kde_5.50.5_nb.n4_test$probLeft/bst_tda_kde_5.50.5_nb.n4_test$probRight
bst_tda_kde_5.50.5_nb.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nb.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb.n4_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n4_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1612
## 
## $winRight
## [1] 0.8388
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nb.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n4_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nb.n4_test)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb.n4_test)) #bf_tda_kde_5.50.5_nb.n4_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nb.n4_test))

##Node5

Adult_TDA_KDE_5.50.5_n5_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.50.5.n5.vec, 
                method = 'nb', 
                trControl = fitControl,
                metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Married.AF.spouse, V6.Married.civ.spouse, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Farming.fishing, V7.Handlers.cleaners, V7.Priv.house.serv, V7.Prof.specialty, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Other, V10.Female, V10.Male, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.State.gov, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Widowed, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Farming.fishing, V7.Handlers.cleaners, V7.Priv.house.serv, V7.Sales, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Other, V10.Female, V10.Male, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) : 
##   Zero variances for at least one class in variables: V2.., V2.Local.gov, V2.Never.worked, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.Without.pay, V4.10th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Separated, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Exec.managerial, V7.Farming.fishing, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Priv.house.serv, V7.Protective.serv, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Other, V10.Female, V10.Male, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.50.5_n5_NbFit0
## Naive Bayes 
## 
## 14404 samples
##   108 predictor
##     2 classes: ' <=50K', ' >50K' 
## 
## No pre-processing
## Resampling: Cross-Validated (3 fold) 
## Summary of sample sizes: 9604, 9602, 9602 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy   Kappa
##   FALSE            NaN  NaN  
##    TRUE      0.9979867    0  
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.
Adult_TDA_KDE_5.50.5_n5_NbFit0$resample
##    Accuracy Kappa Resample
## 1 0.9981250     0    Fold1
## 2 0.9979175     0    Fold2
## 3 0.9979175     0    Fold3
ad_tda_kde_5.50.5_n5_nb_fit_re<-Adult_TDA_KDE_5.50.5_n5_NbFit0$resample[1]

summary(Adult_TDA_KDE_5.50.5_n5_NbFit0)
##             Length Class      Mode     
## apriori       2    table      numeric  
## tables      108    -none-     list     
## levels        2    -none-     character
## call          6    -none-     call     
## x           108    data.frame list     
## usekernel     1    -none-     logical  
## varnames    108    -none-     character
## xNames      108    -none-     character
## problemType   1    -none-     character
## tuneValue     3    data.frame list     
## obsLevels     2    -none-     character
## param         0    -none-     list
# Predict outcome using Adult_TDA_KDE_5.50.5_n5_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.50.5_n5_NbFit0, newdata= adult.one_hot_df4Test)

# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.50.5_n5_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.50.5_n5_nb_cf0
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n5_nb_cf0 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  <=50K  >50K
##      <=50K   7416  2352
##      >50K       0     0
##                                           
##                Accuracy : 0.7592          
##                  95% CI : (0.7506, 0.7677)
##     No Information Rate : 0.7592          
##     P-Value [Acc > NIR] : 0.5055          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : <2e-16          
##                                           
##             Sensitivity : 1.0000          
##             Specificity : 0.0000          
##          Pos Pred Value : 0.7592          
##          Neg Pred Value :    NaN          
##              Prevalence : 0.7592          
##          Detection Rate : 0.7592          
##    Detection Prevalence : 1.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class :  <=50K          
## 
ad_tda_kde_5.50.5_n5_nb_cf0$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7592138      0.0000000      0.7506071      0.7676657      0.7592138 
## AccuracyPValue  McnemarPValue 
##      0.5055358      0.0000000
ad_tda_kde_5.50.5_n5_nb_cf0_ov_acc<-ad_tda_kde_5.50.5_n5_nb_cf0$overall[1]
ad_tda_kde_5.50.5_n5_nb_cf0$byClass
##          Sensitivity          Specificity       Pos Pred Value 
##            1.0000000            0.0000000            0.7592138 
##       Neg Pred Value            Precision               Recall 
##                  NaN            0.7592138            1.0000000 
##                   F1           Prevalence       Detection Rate 
##            0.8631285            0.7592138            0.7592138 
## Detection Prevalence    Balanced Accuracy 
##            1.0000000            0.5000000
ad_tda_kde_5.50.5_n5_nb_cf0_pre_rec_f1<-ad_tda_kde_5.50.5_n5_nb_cf0$byClass[5:7]

###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers

### 3-fold diff

diff_tda_kde_5.50.5_nb_n5_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.50.5_n5_nb_fit_re)
diff_tda_kde_5.50.5_nb_n5_3_fold
##     Accuracy
## 1 -0.2378249
## 2 -0.2228451
## 3 -0.2387704
## Bayesian Tests 3-fold diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nb.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n5_3_fold
## $probLeft
## [1] 0.75
## 
## $probRope
## [1] 0.25
## 
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nb.n5_3_fold_odds.left<-bst_tda_kde_5.50.5_nb.n5_3_fold$probLeft/bst_tda_kde_5.50.5_nb.n5_3_fold$probRight
bst_tda_kde_5.50.5_nb.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nb.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n5_3_fold
## $winLeft
## [1] 0.9906667
## 
## $winRope
## [1] 0.009333333
## 
## $winRight
## [1] 0
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nb.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n5_3_fold
## $left
## [1] 0.9996442
## 
## $rope
## [1] 5.607864e-05
## 
## $right
## [1] 0.0002997458
# Rope Plot
plot(rope(diff_tda_kde_5.50.5_nb_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb_n5_3_fold))
#bf_tda_kde_5.50.5_nb.n5_3_fold

#t_test
t.test(as.matrix(diff_tda_kde_5.50.5_nb_n5_3_fold))
## 
##  One Sample t-test
## 
## data:  as.matrix(diff_tda_kde_5.50.5_nb_n5_3_fold)
## t = -45.201, df = 2, p-value = 0.0004891
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2553401 -0.2109535
## sample estimates:
##  mean of x 
## -0.2331468
### Test set diff
diff_tda_kde_5.50.5_nb.n5_test<-(svm_cf_ov_acc - ad_tda_kde_5.50.5_n5_nb_cf0_ov_acc)
diff_tda_kde_5.50.5_nb.n5_test
##   Accuracy 
## 0.09183047
## Bayesian Tests Test set diff

# Bayesian Sign Test

bst_tda_kde_5.50.5_nb.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.50.5_nb.n5_test),-0.01,0.01)
bst_tda_kde_5.50.5_nb.n5_test
## $probLeft
## [1] 0
## 
## $probRope
## [1] 0.5
## 
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test 

bst_tda_kde_5.50.5_nb.n5_test_odds.left<-bst_tda_kde_5.50.5_nb.n5_test$probLeft/bst_tda_kde_5.50.5_nb.n5_test$probRight
bst_tda_kde_5.50.5_nb.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test

bsr_tda_kde_5.50.5_nb.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.50.5_nb.n5_test),-0.01,0.01)
bsr_tda_kde_5.50.5_nb.n5_test
## $winLeft
## [1] 0
## 
## $winRope
## [1] 0.1561667
## 
## $winRight
## [1] 0.8438333
# Bayesian Correlated Test

bct_tda_kde_5.50.5_nb.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.50.5_nb.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.50.5_nb.n5_test
## $left
## [1] NA
## 
## $rope
## [1] NA
## 
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.50.5_nb.n5_test)))

#BayesFactor
#bf_tda_kde_5.50.5_nb.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.50.5_nb.n5_test)) #bf_tda_kde_5.50.5_nb.n5_test

#t_test
#t.test(as.matrix(diff_tda_kde_5.50.5_nb.n5_test))